From 7decbc67a152250e56cdfd204c302c11c5998a5e Mon Sep 17 00:00:00 2001 From: "Narlo, Joseph" Date: Mon, 6 Oct 2025 14:50:00 -0500 Subject: [PATCH] [SWDEV-539078] Add missing API definitions to python interface (#525) Added the following API's to amdsmi_interface.py. amdsmi_get_cpu_handle() amdsmi_get_esmi_err_msg() amdsmi_get_gpu_event_notification() amdsmi_get_processor_count_from_handles() amdsmi_get_processor_handles_by_type() amdsmi_gpu_validate_ras_eeprom() amdsmi_init_gpu_event_notification() amdsmi_set_gpu_event_notification_mask() amdsmi_stop_gpu_event_notification() amdsmi_get_gpu_busy_percent() Added additional return value to API amdsmi_get_xgmi_plpd(). The entry policies is added to the end of the dictionary to match API definition. The entry plpds is marked for deprecation as it has the same information as policies. --------- Signed-off-by: josnarlo Signed-off-by: Maisam Arif --- CHANGELOG.md | 19 + amdsmi_cli/amdsmi_commands.py | 54 +- amdsmi_cli/amdsmi_helpers.py | 2 +- docs/reference/amdsmi-py-api.md | 2 +- py-interface/__init__.py | 18 +- py-interface/amdsmi_interface.py | 915 ++++-- src/amd_smi/amd_smi.cc | 6 +- tests/python_unittest/unit_tests.py | 4707 ++++++++++++--------------- 8 files changed, 2662 insertions(+), 3061 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 820a422dbc..5a0b388133 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,22 @@ Full documentation for amd_smi_lib is available at [https://rocm.docs.amd.com/pr ### Added +- **Added the following API's to amdsmi_interface.py**. + - amdsmi_get_cpu_handle() + - amdsmi_get_esmi_err_msg() + - amdsmi_get_gpu_event_notification() + - amdsmi_get_processor_count_from_handles() + - amdsmi_get_processor_handles_by_type() + - amdsmi_gpu_validate_ras_eeprom() + - amdsmi_init_gpu_event_notification() + - amdsmi_set_gpu_event_notification_mask() + - amdsmi_stop_gpu_event_notification() + - amdsmi_get_gpu_busy_percent() + +- **Added additional return value to API amdsmi_get_xgmi_plpd()**. + - The entry `policies` is added to the end of the dictionary to match API definition. + - The entry `plpds` is marked for deprecation as it has the same information as `policies`. + - **Added `amdsmi_get_gpu_revision()` to Python API** - This function retrieves the GPU revision ID. Available in `amdsmi_interface.py` as `amdsmi_get_gpu_revision()`. @@ -190,6 +206,9 @@ GPU: 0 ### Resolved Issues +- **Fixed an issue where amdsmi_get_gpu_od_volt_info() returned a reference to a python object**. + - The returned dictionary was changed to return values in all fields + - **Fixed an issue where using `amd-smi ras --folder ` was forcing the created folder's name to be lowercase**. - This fix also allows all string input options to be case insensitive. diff --git a/amdsmi_cli/amdsmi_commands.py b/amdsmi_cli/amdsmi_commands.py index 776cf04924..88aa580fcd 100644 --- a/amdsmi_cli/amdsmi_commands.py +++ b/amdsmi_cli/amdsmi_commands.py @@ -99,6 +99,19 @@ class AMDSMICommands(): logging.error('Unable to detect any CPU devices, check amd_hsmp version and module status (sudo modprobe amd_hsmp)') exit_flag = True + self.convert_clock_type = { + "sys": amdsmi_interface.AmdSmiClkType.SYS, + "mem": amdsmi_interface.AmdSmiClkType.MEM, + "df": amdsmi_interface.AmdSmiClkType.DF, + "soc": amdsmi_interface.AmdSmiClkType.SOC, + "dcef": amdsmi_interface.AmdSmiClkType.DCEF, + # vclk and dclk currently do not support levels so average clk is given for frequency levels + "vclk0": amdsmi_interface.AmdSmiClkType.VCLK0, + "vclk1": amdsmi_interface.AmdSmiClkType.VCLK1, + "dclk0": amdsmi_interface.AmdSmiClkType.DCLK0, + "dclk1": amdsmi_interface.AmdSmiClkType.DCLK1 + } + if exit_flag: version_args = argparse.Namespace() version_args.gpu_version = False @@ -1041,28 +1054,9 @@ class AMDSMICommands(): for clk in list(clk_dict.keys()): if clk not in args.clock: del clk_dict[clk] - for clk in args.clock: - clk_type = clk.lower() - if clk_type == "sys": - clk_type_conversion = amdsmi_interface.AmdSmiClkType.SYS - elif clk_type == "mem": - clk_type_conversion = amdsmi_interface.AmdSmiClkType.MEM - elif clk_type == "df": - clk_type_conversion = amdsmi_interface.AmdSmiClkType.DF - elif clk_type == "soc": - clk_type_conversion = amdsmi_interface.AmdSmiClkType.SOC - elif clk_type == "dcef": - clk_type_conversion = amdsmi_interface.AmdSmiClkType.DCEF - # vclk and dclk currently do not support levels so average clk is given for frequency levels - elif clk_type == "vclk0": - clk_type_conversion = amdsmi_interface.AmdSmiClkType.VCLK0 - elif clk_type == "vclk1": - clk_type_conversion = amdsmi_interface.AmdSmiClkType.VCLK1 - elif clk_type == "dclk0": - clk_type_conversion = amdsmi_interface.AmdSmiClkType.DCLK0 - elif clk_type == "dclk1": - clk_type_conversion = amdsmi_interface.AmdSmiClkType.DCLK1 + if clk in self.convert_clock_type: + clk_type_conversion = self.convert_clock_type[clk] else: clk_type_conversion = "N/A" output_format = self.helpers.get_output_format() @@ -2471,10 +2465,10 @@ class AMDSMICommands(): # Populate voltage point values for point in range(amdsmi_interface.AMDSMI_NUM_VOLTAGE_CURVE_POINTS): if isinstance(od_volt, dict): - logging.debug(f"point_{point} frequency: {od_volt['curve.vc_points'][point].frequency}") - logging.debug(f"point_{point} voltage: {od_volt['curve.vc_points'][point].voltage}") - frequency = int(od_volt["curve.vc_points"][point].frequency / 1000000) - voltage = int(od_volt["curve.vc_points"][point].voltage) + logging.debug(f"point_{point} frequency: {od_volt['curve.vc_points'][point]['frequency']}") + logging.debug(f"point_{point} voltage: {od_volt['curve.vc_points'][point]['voltage']}") + frequency = int(od_volt["curve.vc_points"][point]['frequency'] / 1000000) + voltage = int(od_volt["curve.vc_points"][point]['voltage']) else: frequency = "N/A" voltage = "N/A" @@ -4875,6 +4869,11 @@ class AMDSMICommands(): return else: # For non-pcie clocks + if clk_type in self.convert_clock_type: + clk_type_conversion = self.convert_clock_type[clk_type] + else: + clk_type_conversion = "N/A" + try: amdsmi_interface.amdsmi_set_clk_freq(args.gpu, clk_type, freq_bitmask) results_clk_lvl['set_clock'] = f"Successfully set {clk_type} perf level(s) to {perf_levels_str}" @@ -4959,6 +4958,7 @@ class AMDSMICommands(): clk_tuple = amdsmi_interface.amdsmi_get_clock_info(args.gpu, amdsmi_clk_type) if lim_type == "min": + amdsmi_lim_type = amdsmi_interface.AmdSmiClkLimitType.MIN if val > clk_tuple['max_clk']: self.logger.store_output(args.gpu, 'clk_limit', f"Cannot set {args.clk_limit.clk_type} min value greater than max ({clk_tuple['max_clk']}MHz)") self.logger.print_output() @@ -4967,8 +4967,8 @@ class AMDSMICommands(): if val == clk_tuple['min_clk']: val_changed = False # Clock limit value did not changed - - if lim_type == "max": + elif lim_type == "max": + amdsmi_lim_type = amdsmi_interface.AmdSmiClkLimitType.MAX if val < clk_tuple['min_clk']: self.logger.store_output(args.gpu, 'clk_limit', f"Cannot set {args.clk_limit.clk_type} max value less than min ({clk_tuple['min_clk']}MHz)") self.logger.print_output() diff --git a/amdsmi_cli/amdsmi_helpers.py b/amdsmi_cli/amdsmi_helpers.py index a90cb3cb05..518b8f9a5f 100755 --- a/amdsmi_cli/amdsmi_helpers.py +++ b/amdsmi_cli/amdsmi_helpers.py @@ -857,7 +857,7 @@ class AMDSMIHelpers(): xgmi_plpd_info = amdsmi_interface.amdsmi_get_xgmi_plpd(dev) except amdsmi_interface.AmdSmiLibraryException as e: continue - for policy in xgmi_plpd_info['plpds']: + for policy in xgmi_plpd_info['policies']: policy_string = f"{policy['policy_id']}: {policy['policy_description']}" if not policy_string in xgmi_plpd_profile_list: xgmi_plpd_profile_list.append(policy_string) diff --git a/docs/reference/amdsmi-py-api.md b/docs/reference/amdsmi-py-api.md index b70962ae2f..bfdf4e1443 100644 --- a/docs/reference/amdsmi-py-api.md +++ b/docs/reference/amdsmi-py-api.md @@ -3325,7 +3325,7 @@ Field | Description ---|--- `num_supported` | The number of supported policies `current_id` | The current policy index -`plpds` | List of policies. +`policies` | List of policies. (`plpds` marked for deprecation in next major release) Exceptions that can be thrown by `amdsmi_get_xgmi_plpd` function: diff --git a/py-interface/__init__.py b/py-interface/__init__.py index 57e3b322f6..f9d2bf84c7 100644 --- a/py-interface/__init__.py +++ b/py-interface/__init__.py @@ -29,6 +29,8 @@ from .amdsmi_interface import amdsmi_get_processor_type from .amdsmi_interface import amdsmi_get_processor_handles from .amdsmi_interface import amdsmi_get_socket_handles from .amdsmi_interface import amdsmi_get_socket_info +from .amdsmi_interface import amdsmi_get_processor_count_from_handles +from .amdsmi_interface import amdsmi_get_processor_handles_by_type # ESMI Dependent Functions try: @@ -78,6 +80,7 @@ try: from .amdsmi_interface import amdsmi_get_cpu_family from .amdsmi_interface import amdsmi_get_cpu_model from .amdsmi_interface import amdsmi_get_cpu_model_name + from .amdsmi_interface import amdsmi_get_cpu_handles except AttributeError: pass @@ -101,6 +104,7 @@ from .amdsmi_interface import amdsmi_get_power_cap_info from .amdsmi_interface import amdsmi_get_gpu_vram_info from .amdsmi_interface import amdsmi_get_gpu_cache_info from .amdsmi_interface import amdsmi_get_gpu_xcd_counter +from .amdsmi_interface import amdsmi_get_gpu_revision # # Microcode and VBIOS Information from .amdsmi_interface import amdsmi_get_gpu_vbios_info @@ -111,13 +115,19 @@ from .amdsmi_interface import amdsmi_get_gpu_activity from .amdsmi_interface import amdsmi_get_gpu_vram_usage from .amdsmi_interface import amdsmi_get_power_info from .amdsmi_interface import amdsmi_get_clock_info +from .amdsmi_interface import amdsmi_get_gpu_busy_percent from .amdsmi_interface import amdsmi_get_pcie_info from .amdsmi_interface import amdsmi_get_gpu_bad_page_info from .amdsmi_interface import amdsmi_get_gpu_bad_page_threshold from .amdsmi_interface import amdsmi_get_violation_status from .amdsmi_interface import amdsmi_get_gpu_xgmi_link_status -from .amdsmi_interface import amdsmi_get_gpu_revision + +# # Event Notification +from .amdsmi_interface import amdsmi_init_gpu_event_notification +from .amdsmi_interface import amdsmi_set_gpu_event_notification_mask +from .amdsmi_interface import amdsmi_get_gpu_event_notification +from .amdsmi_interface import amdsmi_stop_gpu_event_notification # # Process Information from .amdsmi_interface import amdsmi_get_gpu_process_list @@ -132,6 +142,7 @@ from .amdsmi_interface import amdsmi_get_gpu_board_info from .amdsmi_interface import amdsmi_get_gpu_ras_feature_info from .amdsmi_interface import amdsmi_get_gpu_ras_block_features_enabled from .amdsmi_interface import amdsmi_get_gpu_cper_entries +from .amdsmi_interface import amdsmi_gpu_validate_ras_eeprom # # Unsupported Functions In Virtual Environment from .amdsmi_interface import amdsmi_set_gpu_pci_bandwidth @@ -150,9 +161,12 @@ from .amdsmi_interface import amdsmi_set_gpu_fan_speed from .amdsmi_interface import amdsmi_reset_gpu_fan from .amdsmi_interface import amdsmi_set_clk_freq from .amdsmi_interface import amdsmi_set_gpu_overdrive_level +from .amdsmi_interface import amdsmi_get_soc_pstate from .amdsmi_interface import amdsmi_set_soc_pstate from .amdsmi_interface import amdsmi_set_xgmi_plpd +from .amdsmi_interface import amdsmi_get_xgmi_plpd from .amdsmi_interface import amdsmi_clean_gpu_local_data +from .amdsmi_interface import amdsmi_get_gpu_process_isolation from .amdsmi_interface import amdsmi_set_gpu_process_isolation # # Physical State Queries @@ -193,6 +207,7 @@ from .amdsmi_interface import amdsmi_get_gpu_compute_process_info_by_pid from .amdsmi_interface import amdsmi_get_gpu_compute_process_gpus from .amdsmi_interface import amdsmi_gpu_xgmi_error_status from .amdsmi_interface import amdsmi_reset_gpu_xgmi_error +from .amdsmi_interface import amdsmi_get_esmi_err_msg # # PCIE information from .amdsmi_interface import amdsmi_get_gpu_bdf_id @@ -255,6 +270,7 @@ from .amdsmi_interface import amdsmi_get_lib_version from .amdsmi_interface import amdsmi_get_rocm_version # # Enums +from .amdsmi_interface import AmdSmiStatus from .amdsmi_interface import AmdSmiInitFlags from .amdsmi_interface import AmdSmiContainerTypes from .amdsmi_interface import AmdSmiDeviceType diff --git a/py-interface/amdsmi_interface.py b/py-interface/amdsmi_interface.py index 49e2a729f5..ee0e18bb5b 100644 --- a/py-interface/amdsmi_interface.py +++ b/py-interface/amdsmi_interface.py @@ -42,7 +42,9 @@ class MaxUIntegerTypes(IntEnum): NO_OF_32BITS = (sys.getsizeof(ctypes.c_uint32) * 8) NO_OF_64BITS = (sys.getsizeof(ctypes.c_uint64) * 8) KILO = math.pow(10, 3) -processor_handle = c_void_p + +socket_handle_t = c_void_p +processor_handle_t = c_void_p ############################### MAX_NUM_PROCESSES = 1024 @@ -84,6 +86,54 @@ AMDSMI_MAX_NUM_XGMI_PHYSICAL_LINK = 64 AMDSMI_GPU_UUID_SIZE = 38 _AMDSMI_STRING_LENGTH = 80 +class AmdSmiStatus(IntEnum): + SUCCESS = amdsmi_wrapper.AMDSMI_STATUS_SUCCESS + INVAL = amdsmi_wrapper.AMDSMI_STATUS_INVAL + NOT_SUPPORTED = amdsmi_wrapper.AMDSMI_STATUS_NOT_SUPPORTED + NOT_YET_IMPLEMENTED = amdsmi_wrapper.AMDSMI_STATUS_NOT_YET_IMPLEMENTED + FAIL_LOAD_MODULE = amdsmi_wrapper.AMDSMI_STATUS_FAIL_LOAD_MODULE + FAIL_LOAD_SYMBOL = amdsmi_wrapper.AMDSMI_STATUS_FAIL_LOAD_SYMBOL + DRM_ERROR = amdsmi_wrapper.AMDSMI_STATUS_DRM_ERROR + API_FAILED = amdsmi_wrapper.AMDSMI_STATUS_API_FAILED + TIMEOUT = amdsmi_wrapper.AMDSMI_STATUS_TIMEOUT + RETRY = amdsmi_wrapper.AMDSMI_STATUS_RETRY + NO_PERM = amdsmi_wrapper.AMDSMI_STATUS_NO_PERM + INTERRUPT = amdsmi_wrapper.AMDSMI_STATUS_INTERRUPT + IO = amdsmi_wrapper.AMDSMI_STATUS_IO + ADDRESS_FAULT = amdsmi_wrapper.AMDSMI_STATUS_ADDRESS_FAULT + FILE_ERROR = amdsmi_wrapper.AMDSMI_STATUS_FILE_ERROR + OUT_OF_RESOURCES = amdsmi_wrapper.AMDSMI_STATUS_OUT_OF_RESOURCES + INTERNAL_EXCEPTION = amdsmi_wrapper.AMDSMI_STATUS_INTERNAL_EXCEPTION + INPUT_OUT_OF_BOUNDS = amdsmi_wrapper.AMDSMI_STATUS_INPUT_OUT_OF_BOUNDS + INIT_ERROR = amdsmi_wrapper.AMDSMI_STATUS_INIT_ERROR + REFCOUNT_OVERFLOW = amdsmi_wrapper.AMDSMI_STATUS_REFCOUNT_OVERFLOW + DIRECTORY_NOT_FOUND = amdsmi_wrapper.AMDSMI_STATUS_DIRECTORY_NOT_FOUND + BUSY = amdsmi_wrapper.AMDSMI_STATUS_BUSY + NOT_FOUND = amdsmi_wrapper.AMDSMI_STATUS_NOT_FOUND + NOT_INIT = amdsmi_wrapper.AMDSMI_STATUS_NOT_INIT + NO_SLOT = amdsmi_wrapper.AMDSMI_STATUS_NO_SLOT + DRIVER_NOT_LOADED = amdsmi_wrapper.AMDSMI_STATUS_DRIVER_NOT_LOADED + MORE_DATA = amdsmi_wrapper.AMDSMI_STATUS_MORE_DATA + NO_DATA = amdsmi_wrapper.AMDSMI_STATUS_NO_DATA + INSUFFICIENT_SIZE = amdsmi_wrapper.AMDSMI_STATUS_INSUFFICIENT_SIZE + UNEXPECTED_SIZE = amdsmi_wrapper.AMDSMI_STATUS_UNEXPECTED_SIZE + UNEXPECTED_DATA = amdsmi_wrapper.AMDSMI_STATUS_UNEXPECTED_DATA + NON_AMD_CPU = amdsmi_wrapper.AMDSMI_STATUS_NON_AMD_CPU + NO_ENERGY_DRV = amdsmi_wrapper.AMDSMI_STATUS_NO_ENERGY_DRV + NO_MSR_DRV = amdsmi_wrapper.AMDSMI_STATUS_NO_MSR_DRV + NO_HSMP_DRV = amdsmi_wrapper.AMDSMI_STATUS_NO_HSMP_DRV + NO_HSMP_SUP = amdsmi_wrapper.AMDSMI_STATUS_NO_HSMP_SUP + NO_HSMP_MSG_SUP = amdsmi_wrapper.AMDSMI_STATUS_NO_HSMP_MSG_SUP + HSMP_TIMEOUT = amdsmi_wrapper.AMDSMI_STATUS_HSMP_TIMEOUT + NO_DRV = amdsmi_wrapper.AMDSMI_STATUS_NO_DRV + FILE_NOT_FOUND = amdsmi_wrapper.AMDSMI_STATUS_FILE_NOT_FOUND + ARG_PTR_NULL = amdsmi_wrapper.AMDSMI_STATUS_ARG_PTR_NULL + AMDGPU_RESTART_ERR = amdsmi_wrapper.AMDSMI_STATUS_AMDGPU_RESTART_ERR + SETTING_UNAVAILABLE = amdsmi_wrapper.AMDSMI_STATUS_SETTING_UNAVAILABLE + CORRUPTED_EEPROM = amdsmi_wrapper.AMDSMI_STATUS_CORRUPTED_EEPROM + MAP_ERROR = amdsmi_wrapper.AMDSMI_STATUS_MAP_ERROR + UNKNOWN_ERROR = amdsmi_wrapper.AMDSMI_STATUS_UNKNOWN_ERROR + class AmdSmiInitFlags(IntEnum): INIT_ALL_PROCESSORS = amdsmi_wrapper.AMDSMI_INIT_ALL_PROCESSORS @@ -507,10 +557,12 @@ class AmdSmiUtilizationCounterType(IntEnum): class AmdSmiProcessorType(IntEnum): UNKNOWN = amdsmi_wrapper.AMDSMI_PROCESSOR_TYPE_UNKNOWN - AMDSMI_PROCESSOR_TYPE_AMD_GPU = amdsmi_wrapper.AMDSMI_PROCESSOR_TYPE_AMD_GPU - AMDSMI_PROCESSOR_TYPE_AMD_CPU = amdsmi_wrapper.AMDSMI_PROCESSOR_TYPE_AMD_CPU - AMDSMI_PROCESSOR_TYPE_NON_AMD_GPU = amdsmi_wrapper.AMDSMI_PROCESSOR_TYPE_NON_AMD_GPU - AMDSMI_PROCESSOR_TYPE_NON_AMD_CPU = amdsmi_wrapper.AMDSMI_PROCESSOR_TYPE_NON_AMD_CPU + AMD_GPU = amdsmi_wrapper.AMDSMI_PROCESSOR_TYPE_AMD_GPU + AMD_CPU = amdsmi_wrapper.AMDSMI_PROCESSOR_TYPE_AMD_CPU + NON_AMD_GPU = amdsmi_wrapper.AMDSMI_PROCESSOR_TYPE_NON_AMD_GPU + NON_AMD_CPU = amdsmi_wrapper.AMDSMI_PROCESSOR_TYPE_NON_AMD_CPU + AMD_CPU_CORE = amdsmi_wrapper.AMDSMI_PROCESSOR_TYPE_AMD_CPU_CORE + AMD_APU = amdsmi_wrapper.AMDSMI_PROCESSOR_TYPE_AMD_APU class AmdSmiRegType(IntEnum): @@ -556,7 +608,7 @@ class AmdSmiAffinityScope(IntEnum): class AmdSmiEventReader: def __init__( self, - processor_handle: processor_handle, + processor_handle: processor_handle_t, event_types: List[AmdSmiEvtNotificationType] ): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): @@ -586,26 +638,26 @@ class AmdSmiEventReader: def read(self, timestamp, num_elem=10): c_count = ctypes.c_uint32(num_elem) - self.event_info = (amdsmi_wrapper.amdsmi_evt_notification_data_t * num_elem)() + event_info = (amdsmi_wrapper.amdsmi_evt_notification_data_t * num_elem)() _check_res( amdsmi_wrapper.amdsmi_get_gpu_event_notification( ctypes.c_int(timestamp), ctypes.byref(c_count), - self.event_info, + event_info, ) ) ret = [] for i in range(c_count.value): unique_event_values = set(event.value for event in AmdSmiEvtNotificationType) - if self.event_info[i].event in unique_event_values: - if AmdSmiEvtNotificationType(self.event_info[i].event).name != "NONE": - processor_handle = amdsmi_wrapper.amdsmi_processor_handle(self.event_info[i].processor_handle) + if event_info[i].event in unique_event_values: + if AmdSmiEvtNotificationType(event_info[i].event).name != "NONE": + processor_handle = amdsmi_wrapper.amdsmi_processor_handle(event_info[i].processor_handle) ret.append( { "processor_handle": processor_handle, - "event": AmdSmiEvtNotificationType(self.event_info[i].event).name, - "message": self.event_info[i].message.decode("utf-8"), + "event": AmdSmiEvtNotificationType(event_info[i].event).name, + "message": event_info[i].message.decode("utf-8"), } ) @@ -732,7 +784,7 @@ def _make_amdsmi_bdf_from_list(bdf): return amdsmi_bdf -def _pad_hex_value(value, length): +def _pad_hex_value(value, length) -> str: """ Pad a hexadecimal value with a given length of zeros :param value: A hexadecimal value to be padded with zeros @@ -748,16 +800,14 @@ def _pad_hex_value(value, length): return value -def _validate_if_max_uint(value, uint_type: MaxUIntegerTypes, isActivity=False, isBool=False) -> Union[str, bool, int]: +def _validate_if_max_uint(value, uint_type: MaxUIntegerTypes, isActivity=False, isBool=False) -> Union[str, bool, int, list]: return_val = "N/A" if not isinstance(value, list): if (value == uint_type) or (isActivity and value > 100): return return_val - else: - if isBool: - return bool(value) - else: - return value + if isBool: + return bool(value) + return value else: return_val = [] for _, v in enumerate(value): @@ -767,8 +817,7 @@ def _validate_if_max_uint(value, uint_type: MaxUIntegerTypes, isActivity=False, return_val.append(v) if isBool: return bool(return_val) - else: - return return_val + return return_val def _notifyTypeToString(notify_type_b): @@ -958,7 +1007,6 @@ def amdsmi_get_processor_info(processor_handle): processor_handle, amdsmi_wrapper.amdsmi_processor_handle) processor_info = ctypes.create_string_buffer(128) - core_id = ctypes.c_uint32() _check_res( amdsmi_wrapper.amdsmi_get_processor_info( processor_handle, ctypes.c_size_t(128), processor_info) @@ -1017,7 +1065,7 @@ def amdsmi_get_cpucore_handles() -> List[c_void_p]: return core_handles -def amdsmi_get_cpu_hsmp_proto_ver(processor_handle: processor_handle) -> int: +def amdsmi_get_cpu_hsmp_proto_ver(processor_handle: processor_handle_t) -> int: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( processor_handle, amdsmi_wrapper.amdsmi_processor_handle @@ -1033,7 +1081,7 @@ def amdsmi_get_cpu_hsmp_proto_ver(processor_handle: processor_handle) -> int: return proto_ver.value def amdsmi_get_cpu_smu_fw_version( - processor_handle: processor_handle) -> Dict[str, int]: + processor_handle: processor_handle_t) -> Dict[str, int]: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( processor_handle, amdsmi_wrapper.amdsmi_processor_handle @@ -1050,7 +1098,7 @@ def amdsmi_get_cpu_smu_fw_version( } def amdsmi_get_cpu_hsmp_driver_version( - processor_handle: processor_handle) -> Dict[str, int]: + processor_handle: processor_handle_t) -> Dict[str, int]: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( processor_handle, amdsmi_wrapper.amdsmi_processor_handle @@ -1066,8 +1114,8 @@ def amdsmi_get_cpu_hsmp_driver_version( } def amdsmi_get_cpu_core_energy( - processor_handle: processor_handle -) -> int: + processor_handle: processor_handle_t +) -> str: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( processor_handle, amdsmi_wrapper.amdsmi_processor_handle @@ -1083,8 +1131,8 @@ def amdsmi_get_cpu_core_energy( return f"{float(penergy.value * pow(10, -6))} J" def amdsmi_get_cpu_socket_energy( - processor_handle: processor_handle -) -> int: + processor_handle: processor_handle_t +) -> str: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( processor_handle, amdsmi_wrapper.amdsmi_processor_handle @@ -1110,7 +1158,7 @@ def amdsmi_get_threads_per_core(): return threads_per_core.value def amdsmi_get_cpu_prochot_status( - processor_handle: processor_handle + processor_handle: processor_handle_t ) -> int: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -1127,7 +1175,7 @@ def amdsmi_get_cpu_prochot_status( return prochot.value def amdsmi_get_cpu_fclk_mclk( - processor_handle: processor_handle + processor_handle: processor_handle_t ): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -1148,8 +1196,8 @@ def amdsmi_get_cpu_fclk_mclk( } def amdsmi_get_cpu_cclk_limit( - processor_handle: processor_handle -) -> int: + processor_handle: processor_handle_t +) -> str: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( processor_handle, amdsmi_wrapper.amdsmi_processor_handle @@ -1165,7 +1213,7 @@ def amdsmi_get_cpu_cclk_limit( return f"{cclk.value} MHz" def amdsmi_get_cpu_socket_current_active_freq_limit( - processor_handle: processor_handle + processor_handle: processor_handle_t ): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -1193,7 +1241,7 @@ def amdsmi_get_cpu_socket_current_active_freq_limit( } def amdsmi_get_cpu_socket_freq_range( - processor_handle: processor_handle + processor_handle: processor_handle_t ): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -1214,8 +1262,8 @@ def amdsmi_get_cpu_socket_freq_range( } def amdsmi_get_cpu_core_current_freq_limit( - processor_handle: processor_handle -) -> int: + processor_handle: processor_handle_t +) -> str: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( processor_handle, amdsmi_wrapper.amdsmi_processor_handle @@ -1231,8 +1279,8 @@ def amdsmi_get_cpu_core_current_freq_limit( return f"{freq.value} MHz" def amdsmi_get_cpu_socket_power( - processor_handle: processor_handle -) -> int: + processor_handle: processor_handle_t +) -> str: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( processor_handle, amdsmi_wrapper.amdsmi_processor_handle @@ -1248,7 +1296,7 @@ def amdsmi_get_cpu_socket_power( return f"{ppower.value} mW" def amdsmi_get_cpu_socket_power_cap( - processor_handle: processor_handle + processor_handle: processor_handle_t ) -> int: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -1262,11 +1310,12 @@ def amdsmi_get_cpu_socket_power_cap( ) ) - return f"{pcap.value} mW" + # in mW + return pcap.value def amdsmi_get_cpu_socket_power_cap_max( - processor_handle: processor_handle -) -> int: + processor_handle: processor_handle_t +) -> str: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( processor_handle, amdsmi_wrapper.amdsmi_processor_handle @@ -1282,8 +1331,8 @@ def amdsmi_get_cpu_socket_power_cap_max( return f"{pmax.value} mW" def amdsmi_get_cpu_pwr_svi_telemetry_all_rails( - processor_handle: processor_handle -) -> int: + processor_handle: processor_handle_t +) -> str: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( processor_handle, amdsmi_wrapper.amdsmi_processor_handle @@ -1299,7 +1348,7 @@ def amdsmi_get_cpu_pwr_svi_telemetry_all_rails( return f"{power.value} mW" def amdsmi_set_cpu_socket_power_cap( - processor_handle: processor_handle, power_cap: int + processor_handle: processor_handle_t, power_cap: int ): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -1308,15 +1357,15 @@ def amdsmi_set_cpu_socket_power_cap( if not isinstance(power_cap, int): raise AmdSmiParameterException(power_cap, int) - power_cap = ctypes.c_uint32(power_cap) + power_cap_32 = ctypes.c_uint32(power_cap) _check_res( amdsmi_wrapper.amdsmi_set_cpu_socket_power_cap( - processor_handle, power_cap) + processor_handle, power_cap_32) ) def amdsmi_set_cpu_pwr_efficiency_mode( - processor_handle: processor_handle, mode: int + processor_handle: processor_handle_t, mode: int ): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -1324,15 +1373,15 @@ def amdsmi_set_cpu_pwr_efficiency_mode( ) if not isinstance(mode, int): raise AmdSmiParameterException(mode, int) - mode = ctypes.c_uint8(mode) + mode_8 = ctypes.c_uint8(mode) _check_res( amdsmi_wrapper.amdsmi_set_cpu_pwr_efficiency_mode( - processor_handle, mode) + processor_handle, mode_8) ) def amdsmi_get_cpu_core_boostlimit( - processor_handle: processor_handle + processor_handle: processor_handle_t ) -> int: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -1346,11 +1395,12 @@ def amdsmi_get_cpu_core_boostlimit( ) ) - return f"{boostlimit.value} MHz" + # In MHz" + return boostlimit.value def amdsmi_get_cpu_socket_c0_residency( - processor_handle: processor_handle -) -> int: + processor_handle: processor_handle_t +) -> str: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( processor_handle, amdsmi_wrapper.amdsmi_processor_handle @@ -1366,7 +1416,7 @@ def amdsmi_get_cpu_socket_c0_residency( return f"{c0_residency.value} %" def amdsmi_set_cpu_core_boostlimit( - processor_handle: processor_handle, boostlimit: int + processor_handle: processor_handle_t, boostlimit: int ): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -1374,15 +1424,15 @@ def amdsmi_set_cpu_core_boostlimit( ) if not isinstance(boostlimit, int): raise AmdSmiParameterException(boostlimit, int) - boostlimit = ctypes.c_uint32(boostlimit) + boostlimit_32 = ctypes.c_uint32(boostlimit) _check_res( amdsmi_wrapper.amdsmi_set_cpu_core_boostlimit( - processor_handle, boostlimit) + processor_handle, boostlimit_32) ) def amdsmi_set_cpu_socket_boostlimit( - processor_handle: processor_handle, boostlimit: int + processor_handle: processor_handle_t, boostlimit: int ): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -1390,14 +1440,14 @@ def amdsmi_set_cpu_socket_boostlimit( ) if not isinstance(boostlimit, int): raise AmdSmiParameterException(boostlimit, int) - boostlimit = ctypes.c_uint32(boostlimit) + boostlimit_32 = ctypes.c_uint32(boostlimit) _check_res( amdsmi_wrapper.amdsmi_set_cpu_socket_boostlimit( - processor_handle, boostlimit) + processor_handle, boostlimit_32) ) -def amdsmi_get_cpu_ddr_bw(processor_handle: processor_handle): +def amdsmi_get_cpu_ddr_bw(processor_handle: processor_handle_t): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( processor_handle, amdsmi_wrapper.amdsmi_processor_handle @@ -1414,8 +1464,8 @@ def amdsmi_get_cpu_ddr_bw(processor_handle: processor_handle): } def amdsmi_get_cpu_socket_temperature( - processor_handle: processor_handle -) -> int: + processor_handle: processor_handle_t +) -> str: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( processor_handle, amdsmi_wrapper.amdsmi_processor_handle @@ -1432,7 +1482,7 @@ def amdsmi_get_cpu_socket_temperature( return f"{ptmon.value} Degrees C" def amdsmi_get_cpu_dimm_temp_range_and_refresh_rate( - processor_handle: processor_handle, + processor_handle: processor_handle_t, dimm_addr: int): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -1441,11 +1491,11 @@ def amdsmi_get_cpu_dimm_temp_range_and_refresh_rate( if not isinstance(dimm_addr, int): raise AmdSmiParameterException(dimm_addr, int) - dimm_addr = ctypes.c_uint8(dimm_addr) + dimm_addr_8 = ctypes.c_uint8(dimm_addr) dimm = amdsmi_wrapper.amdsmi_temp_range_refresh_rate_t() _check_res(amdsmi_wrapper.amdsmi_get_cpu_dimm_temp_range_and_refresh_rate(processor_handle, - dimm_addr, + dimm_addr_8, ctypes.byref(dimm))) return { @@ -1454,7 +1504,7 @@ def amdsmi_get_cpu_dimm_temp_range_and_refresh_rate( } def amdsmi_get_cpu_dimm_power_consumption( - processor_handle: processor_handle, + processor_handle: processor_handle_t, dimm_addr: int): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -1463,11 +1513,11 @@ def amdsmi_get_cpu_dimm_power_consumption( if not isinstance(dimm_addr, int): raise AmdSmiParameterException(dimm_addr, int) - dimm_addr = ctypes.c_uint8(dimm_addr) + dimm_addr_8 = ctypes.c_uint8(dimm_addr) dimm = amdsmi_wrapper.amdsmi_dimm_power_t() _check_res(amdsmi_wrapper.amdsmi_get_cpu_dimm_power_consumption(processor_handle, - dimm_addr, + dimm_addr_8, ctypes.byref(dimm))) return { @@ -1477,7 +1527,7 @@ def amdsmi_get_cpu_dimm_power_consumption( } def amdsmi_get_cpu_dimm_thermal_sensor( - processor_handle: processor_handle, + processor_handle: processor_handle_t, dimm_addr: int): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -1486,11 +1536,11 @@ def amdsmi_get_cpu_dimm_thermal_sensor( if not isinstance(dimm_addr, int): raise AmdSmiParameterException(dimm_addr, int) - dimm_addr = ctypes.c_uint8(dimm_addr) + dimm_addr_8 = ctypes.c_uint8(dimm_addr) dimm_thermal = amdsmi_wrapper.amdsmi_dimm_thermal_t() _check_res(amdsmi_wrapper.amdsmi_get_cpu_dimm_thermal_sensor(processor_handle, - dimm_addr, + dimm_addr_8, ctypes.byref(dimm_thermal))) return { @@ -1501,7 +1551,7 @@ def amdsmi_get_cpu_dimm_thermal_sensor( } def amdsmi_set_cpu_xgmi_width( - processor_handle: processor_handle, min_width: int, max_width: int + processor_handle: processor_handle_t, min_width: int, max_width: int ): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -1512,16 +1562,16 @@ def amdsmi_set_cpu_xgmi_width( if not isinstance(max_width, int): raise AmdSmiParameterException(max_width, int) - min_width = ctypes.c_uint8(min_width) - max_width = ctypes.c_uint8(max_width) + min_width_8 = ctypes.c_uint8(min_width) + max_width_8 = ctypes.c_uint8(max_width) _check_res( amdsmi_wrapper.amdsmi_set_cpu_xgmi_width( - processor_handle, min_width, max_width) + processor_handle, min_width_8, max_width_8) ) def amdsmi_set_cpu_gmi3_link_width_range( - processor_handle: processor_handle, + processor_handle: processor_handle_t, min_link_width: int, max_link_width: int ): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): @@ -1533,16 +1583,16 @@ def amdsmi_set_cpu_gmi3_link_width_range( if not isinstance(max_link_width, int): raise AmdSmiParameterException(max_link_width, int) - min_link_width = ctypes.c_uint8(min_link_width) - max_link_width = ctypes.c_uint8(max_link_width) + min_link_width_8 = ctypes.c_uint8(min_link_width) + max_link_width_8 = ctypes.c_uint8(max_link_width) _check_res( amdsmi_wrapper.amdsmi_set_cpu_gmi3_link_width_range( - processor_handle, min_link_width, max_link_width) + processor_handle, min_link_width_8, max_link_width_8) ) def amdsmi_cpu_apb_enable( - processor_handle: processor_handle + processor_handle: processor_handle_t ): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -1554,7 +1604,7 @@ def amdsmi_cpu_apb_enable( ) def amdsmi_cpu_apb_disable( - processor_handle: processor_handle, + processor_handle: processor_handle_t, pstate: int ): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): @@ -1564,15 +1614,15 @@ def amdsmi_cpu_apb_disable( if not isinstance(pstate, int): raise AmdSmiParameterException(pstate, int) - pstate = ctypes.c_uint8(pstate) + pstate_8 = ctypes.c_uint8(pstate) _check_res( amdsmi_wrapper.amdsmi_cpu_apb_disable( - processor_handle, pstate) + processor_handle, pstate_8) ) def amdsmi_set_cpu_socket_lclk_dpm_level( - processor_handle: processor_handle, + processor_handle: processor_handle_t, nbio_id: int, min_val: int, max_val: int ): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): @@ -1586,17 +1636,17 @@ def amdsmi_set_cpu_socket_lclk_dpm_level( if not isinstance(max_val, int): raise AmdSmiParameterException(max_val, int) - nbio_id = ctypes.c_uint8(nbio_id) - min_val = ctypes.c_uint8(min_val) - max_val = ctypes.c_uint8(max_val) + nbio_id_8 = ctypes.c_uint8(nbio_id) + min_val_8 = ctypes.c_uint8(min_val) + max_val_8 = ctypes.c_uint8(max_val) _check_res( amdsmi_wrapper.amdsmi_set_cpu_socket_lclk_dpm_level( - processor_handle, nbio_id, min_val, max_val) + processor_handle, nbio_id_8, min_val_8, max_val_8) ) def amdsmi_get_cpu_socket_lclk_dpm_level( - processor_handle: processor_handle, + processor_handle: processor_handle_t, nbio_id: int ): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): @@ -1606,10 +1656,10 @@ def amdsmi_get_cpu_socket_lclk_dpm_level( if not isinstance(nbio_id, int): raise AmdSmiParameterException(nbio_id, int) - nbio_id = ctypes.c_uint8(nbio_id) + nbio_id_8 = ctypes.c_uint8(nbio_id) dpm_level = amdsmi_wrapper.amdsmi_dpm_level_t() - _check_res(amdsmi_wrapper.amdsmi_get_cpu_socket_lclk_dpm_level(processor_handle, nbio_id, dpm_level)) + _check_res(amdsmi_wrapper.amdsmi_get_cpu_socket_lclk_dpm_level(processor_handle, nbio_id_8, dpm_level)) return { "nbio_max_dpm_level": dpm_level.max_dpm_level, @@ -1617,7 +1667,7 @@ def amdsmi_get_cpu_socket_lclk_dpm_level( } def amdsmi_set_cpu_pcie_link_rate( - processor_handle: processor_handle, + processor_handle: processor_handle_t, rate_ctrl: int ): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): @@ -1627,18 +1677,18 @@ def amdsmi_set_cpu_pcie_link_rate( if not isinstance(rate_ctrl, int): raise AmdSmiParameterException(rate_ctrl, int) - rate_ctrl = ctypes.c_uint8(rate_ctrl) - prev_mode = ctypes.c_uint8() + rate_ctrl_8 = ctypes.c_uint8(rate_ctrl) + prev_mode_8 = ctypes.c_uint8() _check_res( amdsmi_wrapper.amdsmi_set_cpu_pcie_link_rate( - processor_handle, rate_ctrl, ctypes.byref(prev_mode)) + processor_handle, rate_ctrl_8, ctypes.byref(prev_mode_8)) ) - return f"{prev_mode.value}" + return f"{prev_mode_8.value}" def amdsmi_set_cpu_df_pstate_range( - processor_handle: processor_handle, + processor_handle: processor_handle_t, max_pstate: int, min_pstate: int ): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): @@ -1650,15 +1700,15 @@ def amdsmi_set_cpu_df_pstate_range( if not isinstance(min_pstate, int): raise AmdSmiParameterException(min_pstate, int) - max_pstate = ctypes.c_uint8(max_pstate) - min_pstate = ctypes.c_uint8(min_pstate) + max_pstate_8 = ctypes.c_uint8(max_pstate) + min_pstate_8 = ctypes.c_uint8(min_pstate) _check_res( amdsmi_wrapper.amdsmi_set_cpu_df_pstate_range( - processor_handle, max_pstate, min_pstate)) + processor_handle, max_pstate_8, min_pstate_8)) def amdsmi_get_cpu_current_io_bandwidth( - processor_handle: processor_handle, + processor_handle: processor_handle_t, encoding: int, link_name: str ): @@ -1680,7 +1730,7 @@ def amdsmi_get_cpu_current_io_bandwidth( return f"{io_bw.value} Mbps" def amdsmi_get_cpu_current_xgmi_bw( - processor_handle: processor_handle, + processor_handle: processor_handle_t, encoding: int, link_name: str ): @@ -1702,7 +1752,7 @@ def amdsmi_get_cpu_current_xgmi_bw( return f"{xgmi_bw.value} Mbps" def amdsmi_get_hsmp_metrics_table_version( - processor_handle: processor_handle + processor_handle: processor_handle_t ): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -1723,26 +1773,22 @@ def amdsmi_get_hsmp_metrics_table_version( def check_msb_32(num): msb = 1 << (NO_OF_32BITS - 1) - '''If msb = 1 , then take 2's complement of the number''' + # If msb = 1 , then take 2's complement of the number if num & msb: num = ~num + 1 - return num - else: - return num + return num # Get 2's complement of 64 bit unsigned integer def check_msb_64(num): msb = 1 << (NO_OF_64BITS - 1) - '''If msb = 1 , then take 2's complement of the number''' + # If msb = 1 , then take 2's complement of the number if num & msb: num = ~num + 1 - return num - else: - return num + return num def amdsmi_get_hsmp_metrics_table( - processor_handle: processor_handle + processor_handle: processor_handle_t ): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -1751,7 +1797,7 @@ def amdsmi_get_hsmp_metrics_table( mtbl = amdsmi_wrapper.amdsmi_hsmp_metrics_table_t() - '''Encodings for the metric table defined for hsmp''' + # Encodings for the metric table defined for hsmp fraction_q10 = 1 / math.pow(2, 10) fraction_uq10 = fraction_q10 fraction_uq16 = 1 / math.pow(2, 16) @@ -1828,7 +1874,7 @@ def amdsmi_get_hsmp_metrics_table( } def amdsmi_first_online_core_on_cpu_socket( - processor_handle: processor_handle + processor_handle: processor_handle_t ): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -1859,7 +1905,7 @@ def amdsmi_get_cpu_model(): return model.value def amdsmi_get_cpu_model_name( - processor_handle: processor_handle + processor_handle: processor_handle_t ): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -1903,8 +1949,8 @@ def amdsmi_shut_down(): def amdsmi_get_processor_type( - processor_handle: processor_handle, -) -> ctypes.c_uint32: + processor_handle: processor_handle_t, +) -> Dict[str, str]: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( processor_handle, amdsmi_wrapper.amdsmi_processor_handle @@ -1921,7 +1967,7 @@ def amdsmi_get_processor_type( } -def amdsmi_get_gpu_device_bdf(processor_handle: processor_handle) -> str: +def amdsmi_get_gpu_device_bdf(processor_handle: processor_handle_t) -> str: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( processor_handle, amdsmi_wrapper.amdsmi_processor_handle @@ -1936,7 +1982,7 @@ def amdsmi_get_gpu_device_bdf(processor_handle: processor_handle) -> str: return _format_bdf(bdf_info) -def amdsmi_get_gpu_device_uuid(processor_handle: processor_handle) -> str: +def amdsmi_get_gpu_device_uuid(processor_handle: processor_handle_t) -> str: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( processor_handle, amdsmi_wrapper.amdsmi_processor_handle @@ -1956,12 +2002,12 @@ def amdsmi_get_gpu_device_uuid(processor_handle: processor_handle) -> str: return uuid.value.decode("utf-8") -def amdsmi_get_gpu_enumeration_info(processor_handle: processor_handle) -> Dict[str, Any]: +def amdsmi_get_gpu_enumeration_info(processor_handle: processor_handle_t) -> Dict[str, Any]: """ Retrieves GPU enumeration information including DRM card ID, DRM render ID, HIP ID, and HIP UUID. Parameters: - processor_handle (amdsmi_processor_handle): The processor handle. + processor_handle (amdsmi_processor_handle_t): The processor handle. Returns: Dict[str, Any]: A dictionary containing the retrieved enumeration information. @@ -1996,7 +2042,7 @@ def amdsmi_get_gpu_enumeration_info(processor_handle: processor_handle) -> Dict[ return enumeration_info def amdsmi_get_cpu_affinity_with_scope( - processor_handle: processor_handle, + processor_handle: processor_handle_t, scope: AmdSmiAffinityScope ) -> List[int]: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): @@ -2010,22 +2056,21 @@ def amdsmi_get_cpu_affinity_with_scope( socket_count = amdsmi_get_cpu_socket_count() sock_info = amdsmi_get_cpu_cores_per_socket(socket_count) core_count = sock_info['cores_per_socket'] - + size = ctypes.c_uint32(0) size = (socket_count * core_count)/ (ctypes.sizeof(ctypes.c_uint64) * 8) size = int(math.ceil(size)) size = ctypes.c_uint32(size) cpu_set = (ctypes.c_uint64 * size.value)() - + _check_res( amdsmi_wrapper.amdsmi_get_cpu_affinity_with_scope( processor_handle, size, cpu_set, scope) ) - return cpu_set def amdsmi_get_gpu_asic_info( - processor_handle: processor_handle, + processor_handle: processor_handle_t, ) -> Dict[str, Any]: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -2042,9 +2087,9 @@ def amdsmi_get_gpu_asic_info( target_graphics_version = hex(asic_info_struct.target_graphics_version)[2:] subsystem_id = _validate_if_max_uint(asic_info_struct.subsystem_id, MaxUIntegerTypes.UINT32_T) subvendor_id = _validate_if_max_uint(asic_info_struct.subvendor_id, MaxUIntegerTypes.UINT32_T) - if not isinstance(subsystem_id, str): + if isinstance(subsystem_id, int): subsystem_id = _pad_hex_value(hex(subsystem_id), 4) - if not isinstance(subvendor_id, str): + if isinstance(subvendor_id, int): subvendor_id = _pad_hex_value(hex(subvendor_id), 4) asic_info = { "market_name": market_name, @@ -2087,7 +2132,7 @@ def amdsmi_get_gpu_asic_info( def amdsmi_get_gpu_kfd_info( - processor_handle: processor_handle, + processor_handle: processor_handle_t, ) -> Dict[str, Any]: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -2110,7 +2155,7 @@ def amdsmi_get_gpu_kfd_info( def amdsmi_get_power_cap_info( - processor_handle: processor_handle, + processor_handle: processor_handle_t, ) -> Dict[str, Any]: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -2132,7 +2177,7 @@ def amdsmi_get_power_cap_info( def amdsmi_get_gpu_pm_metrics_info( - processor_handle: processor_handle, + processor_handle: processor_handle_t, ) -> List[Dict[str, Any]]: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -2144,24 +2189,25 @@ def amdsmi_get_gpu_pm_metrics_info( _check_res( amdsmi_wrapper.amdsmi_get_gpu_pm_metrics_info( - processor_handle, pm_metrics, ctypes.byref(num_mets) + processor_handle, ctypes.byref(pm_metrics), ctypes.byref(num_mets) ) ) results = [] for i in range(num_mets.value): item = { - 'name': pm_metrics[i].name, + 'name': pm_metrics[i].name.decode('utf-8'), 'value': pm_metrics[i].value } results.append(item) + + # Free the allocated memory amdsmi_wrapper.amdsmi_free_name_value_pairs(pm_metrics) return results def amdsmi_get_gpu_reg_table_info( - processor_handle: processor_handle, - reg_type: AmdSmiRegType, + processor_handle: processor_handle_t, reg_type: AmdSmiRegType ) -> List[Dict[str, Any]]: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -2169,7 +2215,7 @@ def amdsmi_get_gpu_reg_table_info( ) reg_metrics = POINTER(amdsmi_wrapper.amdsmi_name_value_t)() - num_regs = ctypes.c_uint32() + num_regs = ctypes.c_uint32(0) _check_res( amdsmi_wrapper.amdsmi_get_gpu_reg_table_info( @@ -2189,7 +2235,7 @@ def amdsmi_get_gpu_reg_table_info( def amdsmi_get_gpu_vram_info( - processor_handle: processor_handle, + processor_handle: processor_handle_t, ) -> Dict[str, Any]: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -2211,7 +2257,7 @@ def amdsmi_get_gpu_vram_info( def amdsmi_get_gpu_xgmi_link_status( - processor_handle: processor_handle, + processor_handle: processor_handle_t, ) -> Dict[str, Any]: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -2246,7 +2292,7 @@ def amdsmi_get_gpu_xgmi_link_status( def amdsmi_get_gpu_cache_info( - processor_handle: processor_handle, + processor_handle: processor_handle_t, ) -> Dict[str, List]: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -2297,7 +2343,7 @@ def amdsmi_get_gpu_cache_info( def amdsmi_get_gpu_vbios_info( - processor_handle: processor_handle, + processor_handle: processor_handle_t, ) -> Dict[str, Any]: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -2324,7 +2370,7 @@ def amdsmi_get_gpu_vbios_info( def amdsmi_get_gpu_activity( - processor_handle: processor_handle, + processor_handle: processor_handle_t, ) -> Dict[str, Any]: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -2352,7 +2398,7 @@ def amdsmi_get_gpu_activity( def amdsmi_get_clock_info( - processor_handle: processor_handle, + processor_handle: processor_handle_t, clock_type: AmdSmiClkType, ) -> Dict[str, Any]: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): @@ -2381,7 +2427,7 @@ def amdsmi_get_clock_info( return dict_ret def amdsmi_get_gpu_bad_page_info( - processor_handle: processor_handle, + processor_handle: processor_handle_t, ) -> List[Dict[str, Any]]: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -2399,7 +2445,8 @@ def amdsmi_get_gpu_bad_page_info( if num_pages.value == 0: return [] - bad_pages = (amdsmi_wrapper.amdsmi_retired_page_record_t * num_pages.value)() + bad_pages_array_type = amdsmi_wrapper.amdsmi_retired_page_record_t * num_pages.value + bad_pages = bad_pages_array_type() _check_res( amdsmi_wrapper.amdsmi_get_gpu_bad_page_info( processor_handle, ctypes.byref(num_pages), bad_pages @@ -2409,7 +2456,7 @@ def amdsmi_get_gpu_bad_page_info( return _format_bad_page_info(bad_pages, num_pages) def amdsmi_get_gpu_bad_page_threshold( - processor_handle: processor_handle, + processor_handle: processor_handle_t, ) -> int: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -2426,7 +2473,7 @@ def amdsmi_get_gpu_bad_page_threshold( return threshold.value def amdsmi_get_violation_status( - processor_handle: processor_handle, + processor_handle: processor_handle_t, ) -> Dict[str, Any]: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -2554,7 +2601,7 @@ def amdsmi_get_violation_status( return dict_return def amdsmi_get_gpu_total_ecc_count( - processor_handle: processor_handle, + processor_handle: processor_handle_t, ) -> Dict[str, Any]: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -2585,7 +2632,7 @@ def notifyTypeToString(notify_type_b): return "".join(guid[::-1]) def amdsmi_get_gpu_cper_entries( - processor_handle: processor_handle, + processor_handle: processor_handle_t, severity_mask: int, buffer_size: int = 4 * 1048576, cursor: int = 0 @@ -2708,7 +2755,7 @@ def amdsmi_get_afids_from_cper( raw_bytes = bytes(record["bytes"]) record_size = record["size"] else: - raise AmdSmiParameterException(record, + raise AmdSmiParameterException(record, "dict with keys 'bytes' and 'size' or bytes/bytearray") # Wrap as char* buf = ctypes.create_string_buffer(raw_bytes, record_size) @@ -2735,7 +2782,7 @@ def amdsmi_get_afids_from_cper( def amdsmi_get_gpu_board_info( - processor_handle: processor_handle, + processor_handle: processor_handle_t, ) -> Dict[str, Any]: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -2763,7 +2810,7 @@ def amdsmi_get_gpu_board_info( def amdsmi_get_gpu_ras_feature_info( - processor_handle: processor_handle, + processor_handle: processor_handle_t, ) -> Dict[str, Any]: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -2788,7 +2835,7 @@ def amdsmi_get_gpu_ras_feature_info( def amdsmi_get_gpu_ras_block_features_enabled( - processor_handle: processor_handle, + processor_handle: processor_handle_t, ) -> List[Dict[str, Any]]: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -2800,8 +2847,9 @@ def amdsmi_get_gpu_ras_block_features_enabled( for gpu_block in AmdSmiGpuBlock: if gpu_block.name == "RESERVED" or gpu_block.name == "INVALID": continue + gpu_block_name = gpu_block.name if gpu_block.name == "LAST": - gpu_block.name = "MPIO" + gpu_block_name = "MPIO" _check_res( amdsmi_wrapper.amdsmi_get_gpu_ras_block_features_enabled( processor_handle, @@ -2811,7 +2859,7 @@ def amdsmi_get_gpu_ras_block_features_enabled( ) ras_states.append( { - "block": gpu_block.name, + "block": gpu_block_name, "status": AmdSmiRasErrState(ras_state.value).name, } ) @@ -2820,7 +2868,7 @@ def amdsmi_get_gpu_ras_block_features_enabled( def amdsmi_get_gpu_process_list( - processor_handle: processor_handle, + processor_handle: processor_handle_t, ) -> List[amdsmi_wrapper.amdsmi_proc_info_t]: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -2862,7 +2910,7 @@ def amdsmi_get_gpu_process_list( def amdsmi_get_gpu_driver_info( - processor_handle: processor_handle, + processor_handle: processor_handle_t, ) -> Dict[str, Any]: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -2890,7 +2938,7 @@ def amdsmi_get_gpu_driver_info( def amdsmi_get_power_info( - processor_handle: processor_handle + processor_handle: processor_handle_t ) -> Dict[str, ctypes.c_uint32]: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -2922,7 +2970,7 @@ def amdsmi_get_power_info( def amdsmi_is_gpu_power_management_enabled( - processor_handle: processor_handle + processor_handle: processor_handle_t ) -> bool: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException(processor_handle, amdsmi_wrapper.amdsmi_processor_handle) @@ -2938,7 +2986,7 @@ def amdsmi_is_gpu_power_management_enabled( def amdsmi_get_fw_info( - processor_handle: processor_handle + processor_handle: processor_handle_t ) -> Dict[str, List[Dict[str, str]]]: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -3002,7 +3050,7 @@ def amdsmi_get_fw_info( def amdsmi_get_gpu_vram_usage( - processor_handle: processor_handle, + processor_handle: processor_handle_t, ) -> Dict[str, Any]: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -3019,7 +3067,7 @@ def amdsmi_get_gpu_vram_usage( def amdsmi_get_pcie_info( - processor_handle: processor_handle, + processor_handle: processor_handle_t, ) -> Dict[str, Any]: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -3065,7 +3113,7 @@ def amdsmi_get_pcie_info( return pcie_info_dict -def amdsmi_get_gpu_xcd_counter(processor_handle: processor_handle) -> int: +def amdsmi_get_gpu_xcd_counter(processor_handle: processor_handle_t) -> int: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException(processor_handle, amdsmi_wrapper.amdsmi_processor_handle) @@ -3090,7 +3138,7 @@ def amdsmi_get_processor_handle_from_bdf(bdf): def amdsmi_get_gpu_vendor_name( - processor_handle: processor_handle, + processor_handle: processor_handle_t, ) -> str: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -3110,20 +3158,20 @@ def amdsmi_get_gpu_vendor_name( return vendor_name.value.decode("utf-8") -def amdsmi_get_gpu_id(processor_handle: processor_handle): +def amdsmi_get_gpu_id(processor_handle: processor_handle_t): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( processor_handle, amdsmi_wrapper.amdsmi_processor_handle ) - id = ctypes.c_uint16() + gpu_id_16 = ctypes.c_uint16() _check_res(amdsmi_wrapper.amdsmi_get_gpu_id( - processor_handle, ctypes.byref(id))) + processor_handle, ctypes.byref(gpu_id_16))) - return id.value + return gpu_id_16.value -def amdsmi_get_gpu_vram_vendor(processor_handle: processor_handle): +def amdsmi_get_gpu_vram_vendor(processor_handle: processor_handle_t): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( processor_handle, amdsmi_wrapper.amdsmi_processor_handle @@ -3142,22 +3190,22 @@ def amdsmi_get_gpu_vram_vendor(processor_handle: processor_handle): return vram_vendor.value.decode("utf-8") -def amdsmi_get_gpu_subsystem_id(processor_handle: processor_handle): +def amdsmi_get_gpu_subsystem_id(processor_handle: processor_handle_t): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( processor_handle, amdsmi_wrapper.amdsmi_processor_handle ) - id = ctypes.c_uint16() + subsystem_id_16 = ctypes.c_uint16() _check_res( amdsmi_wrapper.amdsmi_get_gpu_subsystem_id( - processor_handle, ctypes.byref(id)) + processor_handle, ctypes.byref(subsystem_id_16)) ) - return _pad_hex_value(hex(id.value), 4) + return _pad_hex_value(hex(subsystem_id_16.value), 4) -def amdsmi_get_gpu_subsystem_name(processor_handle: processor_handle): +def amdsmi_get_gpu_subsystem_name(processor_handle: processor_handle_t): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( processor_handle, amdsmi_wrapper.amdsmi_processor_handle @@ -3190,7 +3238,7 @@ def amdsmi_get_lib_version(): def amdsmi_topo_get_numa_node_number( - processor_handle: processor_handle, + processor_handle: processor_handle_t, ): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -3209,8 +3257,8 @@ def amdsmi_topo_get_numa_node_number( def amdsmi_topo_get_link_weight( - processor_handle_src: processor_handle, - processor_handle_dst: processor_handle, + processor_handle_src: processor_handle_t, + processor_handle_dst: processor_handle_t ): if not isinstance(processor_handle_src, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -3234,8 +3282,8 @@ def amdsmi_topo_get_link_weight( def amdsmi_get_minmax_bandwidth_between_processors( - processor_handle_src: processor_handle, - processor_handle_dst: processor_handle, + processor_handle_src: processor_handle_t, + processor_handle_dst: processor_handle_t, ): if not isinstance(processor_handle_src, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -3262,7 +3310,7 @@ def amdsmi_get_minmax_bandwidth_between_processors( return {"min_bandwidth": min_bandwidth.value, "max_bandwidth": max_bandwidth.value} -def amdsmi_get_link_metrics(processor_handle: processor_handle): +def amdsmi_get_link_metrics(processor_handle: processor_handle_t): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( processor_handle, amdsmi_wrapper.amdsmi_processor_handle @@ -3294,8 +3342,8 @@ def amdsmi_get_link_metrics(processor_handle: processor_handle): def amdsmi_topo_get_link_type( - processor_handle_src: processor_handle, - processor_handle_dst: processor_handle, + processor_handle_src: processor_handle_t, + processor_handle_dst: processor_handle_t, ): if not isinstance(processor_handle_src, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -3307,22 +3355,22 @@ def amdsmi_topo_get_link_type( processor_handle_dst, amdsmi_wrapper.amdsmi_processor_handle ) - hops = ctypes.c_uint64() - type = ctypes.c_uint32() + hops_64 = ctypes.c_uint64() + type_32 = ctypes.c_uint32() _check_res( amdsmi_wrapper.amdsmi_topo_get_link_type( processor_handle_src, processor_handle_dst, - ctypes.byref(hops), ctypes.byref(type) + ctypes.byref(hops_64), ctypes.byref(type_32) ) ) - return {"hops": hops.value, "type": type.value} + return {"hops": hops_64.value, "type": type_32.value} def amdsmi_topo_get_p2p_status( - processor_handle_src: processor_handle, - processor_handle_dst: processor_handle, + processor_handle_src: processor_handle_t, + processor_handle_dst: processor_handle_t, ): if not isinstance(processor_handle_src, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -3334,12 +3382,12 @@ def amdsmi_topo_get_p2p_status( processor_handle_dst, amdsmi_wrapper.amdsmi_processor_handle ) - type = ctypes.c_uint32() + type_32 = ctypes.c_uint32() cap = amdsmi_wrapper.struct_amdsmi_p2p_capability_t() _check_res( amdsmi_wrapper.amdsmi_topo_get_p2p_status( - processor_handle_src, processor_handle_dst, ctypes.byref(type), ctypes.byref(cap) + processor_handle_src, processor_handle_dst, ctypes.byref(type_32), ctypes.byref(cap) ) ) @@ -3356,8 +3404,8 @@ def amdsmi_topo_get_p2p_status( def amdsmi_is_P2P_accessible( - processor_handle_src: processor_handle, - processor_handle_dst: processor_handle, + processor_handle_src: processor_handle_t, + processor_handle_dst: processor_handle_t, ): if not isinstance(processor_handle_src, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -3380,7 +3428,7 @@ def amdsmi_is_P2P_accessible( return accessible.value -def amdsmi_get_gpu_compute_partition(processor_handle: processor_handle): +def amdsmi_get_gpu_compute_partition(processor_handle: processor_handle_t): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( processor_handle, amdsmi_wrapper.amdsmi_processor_handle @@ -3400,7 +3448,7 @@ def amdsmi_get_gpu_compute_partition(processor_handle: processor_handle): return compute_partition.value.decode("utf-8") -def amdsmi_set_gpu_compute_partition(processor_handle: processor_handle, +def amdsmi_set_gpu_compute_partition(processor_handle: processor_handle_t, compute_partition: AmdSmiComputePartitionType): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): @@ -3417,7 +3465,7 @@ def amdsmi_set_gpu_compute_partition(processor_handle: processor_handle, ) ) -def amdsmi_set_gpu_accelerator_partition_profile(processor_handle: processor_handle, +def amdsmi_set_gpu_accelerator_partition_profile(processor_handle: processor_handle_t, profile_index: int): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -3433,7 +3481,7 @@ def amdsmi_set_gpu_accelerator_partition_profile(processor_handle: processor_han ) ) -def amdsmi_get_gpu_memory_partition(processor_handle: processor_handle): +def amdsmi_get_gpu_memory_partition(processor_handle: processor_handle_t): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( processor_handle, amdsmi_wrapper.amdsmi_processor_handle @@ -3452,7 +3500,7 @@ def amdsmi_get_gpu_memory_partition(processor_handle: processor_handle): return memory_partition.value.decode("utf-8") -def amdsmi_get_gpu_memory_partition_config(processor_handle: processor_handle): +def amdsmi_get_gpu_memory_partition_config(processor_handle: processor_handle_t): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( processor_handle, amdsmi_wrapper.amdsmi_processor_handle @@ -3475,8 +3523,8 @@ def amdsmi_get_gpu_memory_partition_config(processor_handle: processor_handle): if config.partition_caps.nps_flags.nps8_cap == 1: mem_caps_list.append("NPS8") if (config.partition_caps.nps_flags.nps1_cap == 0 and - config.partition_caps.nps_flags.nps2_cap == 0 and - config.partition_caps.nps_flags.nps4_cap == 0 and + config.partition_caps.nps_flags.nps2_cap == 0 and + config.partition_caps.nps_flags.nps4_cap == 0 and config.partition_caps.nps_flags.nps8_cap == 0): mem_caps_list.append("N/A") @@ -3490,7 +3538,7 @@ def amdsmi_get_gpu_memory_partition_config(processor_handle: processor_handle): return return_dict -def amdsmi_set_gpu_memory_partition(processor_handle: processor_handle, +def amdsmi_set_gpu_memory_partition(processor_handle: processor_handle_t, memory_partition: AmdSmiMemoryPartitionType): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -3506,7 +3554,7 @@ def amdsmi_set_gpu_memory_partition(processor_handle: processor_handle, ) ) -def amdsmi_set_gpu_memory_partition_mode(processor_handle: processor_handle, +def amdsmi_set_gpu_memory_partition_mode(processor_handle: processor_handle_t, memory_partition: AmdSmiMemoryPartitionType): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -3523,13 +3571,14 @@ def amdsmi_set_gpu_memory_partition_mode(processor_handle: processor_handle, ) def amdsmi_get_gpu_accelerator_partition_profile( - processor_handle: processor_handle + processor_handle: processor_handle_t ) -> Dict[str, Any]: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( processor_handle, amdsmi_wrapper.amdsmi_processor_handle ) exception_caught = False + return_dictionary = {} length = 8 partition_id = [0, 0, 0, 0, 0, 0, 0, 0] partition_id_list = (ctypes.c_uint32 * length)(*partition_id) @@ -3565,9 +3614,7 @@ def amdsmi_get_gpu_accelerator_partition_profile( _check_res(ret) # re-raise the exception if error is anything other than AMDSMI_STATUS_NOT_SUPPORTED # this ensures we can get partition ID even if the profile is not supported. finally: - if exception_caught: - return return_dictionary - else: + if not exception_caught: profile_type_ret = amdsmi_wrapper.amdsmi_accelerator_partition_type_t__enumvalues[profile.profile_type].replace("AMDSMI_ACCELERATOR_PARTITION_", "") profile_type_ret = profile_type_ret.replace("INVALID", "N/A") length = profile.num_partitions @@ -3584,8 +3631,8 @@ def amdsmi_get_gpu_accelerator_partition_profile( if profile.memory_caps.nps_flags.nps8_cap == 1: mem_caps_list.append("NPS8") if (profile.memory_caps.nps_flags.nps1_cap == 0 and - profile.memory_caps.nps_flags.nps2_cap == 0 and - profile.memory_caps.nps_flags.nps4_cap == 0 and + profile.memory_caps.nps_flags.nps2_cap == 0 and + profile.memory_caps.nps_flags.nps4_cap == 0 and profile.memory_caps.nps_flags.nps8_cap == 0): mem_caps_list.append("N/A") partition_profile_dict = { @@ -3600,9 +3647,9 @@ def amdsmi_get_gpu_accelerator_partition_profile( "partition_id" : partition_ids, "partition_profile" : partition_profile_dict } - return return_dictionary + return return_dictionary -def amdsmi_get_gpu_accelerator_partition_profile_config(processor_handle: processor_handle) -> Dict: +def amdsmi_get_gpu_accelerator_partition_profile_config(processor_handle: processor_handle_t) -> Dict: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( processor_handle, amdsmi_wrapper.amdsmi_processor_handle @@ -3614,14 +3661,13 @@ def amdsmi_get_gpu_accelerator_partition_profile_config(processor_handle: proces ctypes.byref(config))) profiles = [] + resources = [] resource_idx = 0 for i in range(config.num_profiles): profile = config.profiles[i] profile_type_ret = amdsmi_wrapper.amdsmi_accelerator_partition_type_t__enumvalues[ config.profiles[i].profile_type].replace("AMDSMI_ACCELERATOR_PARTITION_", "") profile_type_ret = profile_type_ret.replace("INVALID", "N/A") - resources = [] - mem_caps_list = [] if profile.memory_caps.nps_flags.nps1_cap == 1: @@ -3633,12 +3679,13 @@ def amdsmi_get_gpu_accelerator_partition_profile_config(processor_handle: proces if profile.memory_caps.nps_flags.nps8_cap == 1: mem_caps_list.append("NPS8") if (profile.memory_caps.nps_flags.nps1_cap == 0 and - profile.memory_caps.nps_flags.nps2_cap == 0 and - profile.memory_caps.nps_flags.nps4_cap == 0 and + profile.memory_caps.nps_flags.nps2_cap == 0 and + profile.memory_caps.nps_flags.nps4_cap == 0 and profile.memory_caps.nps_flags.nps8_cap == 0): mem_caps_list.append("N/A") - for r in range(config.num_resource_profiles): + resources = [] + for _ in range(config.num_resource_profiles): res_profile = config.resource_profiles[resource_idx] resource_profiles_ret = amdsmi_wrapper.amdsmi_accelerator_partition_resource_type_t__enumvalues[ res_profile.resource_type].replace("AMDSMI_ACCELERATOR_", "") @@ -3671,7 +3718,7 @@ def amdsmi_get_gpu_accelerator_partition_profile_config(processor_handle: proces return config_dict -def amdsmi_get_xgmi_info(processor_handle: processor_handle): +def amdsmi_get_xgmi_info(processor_handle: processor_handle_t): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( processor_handle, amdsmi_wrapper.amdsmi_processor_handle @@ -3690,7 +3737,7 @@ def amdsmi_get_xgmi_info(processor_handle: processor_handle): def amdsmi_gpu_counter_group_supported( - processor_handle: processor_handle, + processor_handle: processor_handle_t, event_group: AmdSmiEventGroup, ): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): @@ -3707,7 +3754,7 @@ def amdsmi_gpu_counter_group_supported( def amdsmi_gpu_create_counter( - processor_handle: processor_handle, + processor_handle: processor_handle_t, event_type: AmdSmiEventType, ) -> amdsmi_wrapper.amdsmi_event_handle_t: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): @@ -3726,12 +3773,9 @@ def amdsmi_gpu_create_counter( return event_handle - -def amdsmi_gpu_destroy_counter(event_handle: amdsmi_wrapper.amdsmi_event_handle_t): +def amdsmi_gpu_destroy_counter(event_handle: amdsmi_wrapper.amdsmi_event_handle_t) -> None: if not isinstance(event_handle, amdsmi_wrapper.amdsmi_event_handle_t): - raise AmdSmiParameterException( - event_handle, amdsmi_wrapper.amdsmi_event_handle_t - ) + raise AmdSmiParameterException(event_handle, amdsmi_wrapper.amdsmi_event_handle_t) _check_res(amdsmi_wrapper.amdsmi_gpu_destroy_counter(event_handle)) @@ -3740,16 +3784,16 @@ def amdsmi_gpu_control_counter( counter_command: AmdSmiCounterCommand, ): if not isinstance(event_handle, amdsmi_wrapper.amdsmi_event_handle_t): - raise AmdSmiParameterException( - event_handle, amdsmi_wrapper.amdsmi_event_handle_t - ) + raise AmdSmiParameterException(event_handle, amdsmi_wrapper.amdsmi_event_handle_t) if not isinstance(counter_command, AmdSmiCounterCommand): raise AmdSmiParameterException(counter_command, AmdSmiCounterCommand) + + event_handle_value = amdsmi_wrapper.amdsmi_event_handle_t(event_handle.value) command_args = ctypes.c_void_p() _check_res( amdsmi_wrapper.amdsmi_gpu_control_counter( - event_handle, counter_command, command_args + event_handle_value, counter_command, command_args ) ) @@ -3758,12 +3802,9 @@ def amdsmi_gpu_read_counter( event_handle: amdsmi_wrapper.amdsmi_event_handle_t, ) -> Dict[str, Any]: if not isinstance(event_handle, amdsmi_wrapper.amdsmi_event_handle_t): - raise AmdSmiParameterException( - event_handle, amdsmi_wrapper.amdsmi_event_handle_t - ) + raise AmdSmiParameterException(event_handle, amdsmi_wrapper.amdsmi_event_handle_t) counter_value = amdsmi_wrapper.amdsmi_counter_value_t() - _check_res( amdsmi_wrapper.amdsmi_gpu_read_counter( event_handle, ctypes.byref(counter_value)) @@ -3777,7 +3818,7 @@ def amdsmi_gpu_read_counter( def amdsmi_get_gpu_available_counters( - processor_handle: processor_handle, + processor_handle: processor_handle_t, event_group: AmdSmiEventGroup, ) -> int: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): @@ -3798,7 +3839,7 @@ def amdsmi_get_gpu_available_counters( def amdsmi_set_gpu_perf_level( - processor_handle: processor_handle, + processor_handle: processor_handle_t, perf_level: AmdSmiDevPerfLevel, ): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): @@ -3812,7 +3853,7 @@ def amdsmi_set_gpu_perf_level( processor_handle, perf_level)) -def amdsmi_reset_gpu(processor_handle: processor_handle): +def amdsmi_reset_gpu(processor_handle: processor_handle_t): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( processor_handle, amdsmi_wrapper.amdsmi_processor_handle @@ -3825,7 +3866,7 @@ def amdsmi_gpu_driver_reload(): def amdsmi_set_gpu_fan_speed( - processor_handle: processor_handle, sensor_idx: int, fan_speed: int + processor_handle: processor_handle_t, sensor_idx: int, fan_speed: int ): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -3835,17 +3876,17 @@ def amdsmi_set_gpu_fan_speed( raise AmdSmiParameterException(sensor_idx, int) if not isinstance(fan_speed, int): raise AmdSmiParameterException(fan_speed, int) - sensor_idx = ctypes.c_uint32(sensor_idx) - fan_speed = ctypes.c_uint64(fan_speed) + sensor_idx_32 = ctypes.c_uint32(sensor_idx) + fan_speed_64 = ctypes.c_uint64(fan_speed) _check_res( amdsmi_wrapper.amdsmi_set_gpu_fan_speed( - processor_handle, sensor_idx, fan_speed) + processor_handle, sensor_idx_32, fan_speed_64) ) def amdsmi_reset_gpu_fan( - processor_handle: processor_handle, sensor_idx: int + processor_handle: processor_handle_t, sensor_idx: int ): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -3853,13 +3894,13 @@ def amdsmi_reset_gpu_fan( ) if not isinstance(sensor_idx, int): raise AmdSmiParameterException(sensor_idx, int) - sensor_idx = ctypes.c_uint32(sensor_idx) + sensor_idx_32 = ctypes.c_uint32(sensor_idx) - _check_res(amdsmi_wrapper.amdsmi_reset_gpu_fan(processor_handle, sensor_idx)) + _check_res(amdsmi_wrapper.amdsmi_reset_gpu_fan(processor_handle, sensor_idx_32)) def amdsmi_set_clk_freq( - processor_handle: processor_handle, + processor_handle: processor_handle_t, clk_type: str, freq_bitmask: int, ): @@ -3867,6 +3908,11 @@ def amdsmi_set_clk_freq( raise AmdSmiParameterException( processor_handle, amdsmi_wrapper.amdsmi_processor_handle ) + if not isinstance(clk_type, str): + raise AmdSmiParameterException(clk_type, str) + if not isinstance(freq_bitmask, int): + raise AmdSmiParameterException(freq_bitmask, int) + if clk_type.lower() == "sclk": clk_type_conversion = AmdSmiClkType.SYS elif clk_type.lower() == "mclk": @@ -3879,18 +3925,17 @@ def amdsmi_set_clk_freq( clk_type_conversion = "N/A" if not isinstance(clk_type_conversion, AmdSmiClkType): raise AmdSmiParameterException(clk_type_conversion, AmdSmiClkType) - if not isinstance(freq_bitmask, int): - raise AmdSmiParameterException(freq_bitmask, int) - freq_bitmask = ctypes.c_uint64(freq_bitmask) + + freq_bitmask_64 = ctypes.c_uint64(freq_bitmask) _check_res( amdsmi_wrapper.amdsmi_set_clk_freq( - processor_handle, clk_type_conversion, freq_bitmask + processor_handle, clk_type_conversion, freq_bitmask_64 ) ) def amdsmi_set_soc_pstate( - processor_handle: processor_handle, + processor_handle: processor_handle_t, policy_id: int, ): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): @@ -3905,7 +3950,7 @@ def amdsmi_set_soc_pstate( def amdsmi_set_xgmi_plpd( - processor_handle: processor_handle, + processor_handle: processor_handle_t, policy_id: int, ): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): @@ -3917,10 +3962,11 @@ def amdsmi_set_xgmi_plpd( processor_handle, policy_id ) ) + return def amdsmi_set_gpu_process_isolation( - processor_handle: processor_handle, + processor_handle: processor_handle_t, pisolate: int, ): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): @@ -3935,7 +3981,7 @@ def amdsmi_set_gpu_process_isolation( def amdsmi_clean_gpu_local_data( - processor_handle: processor_handle, + processor_handle: processor_handle_t, ): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -3949,7 +3995,7 @@ def amdsmi_clean_gpu_local_data( def amdsmi_set_gpu_overdrive_level( - processor_handle: processor_handle, overdrive_value: int + processor_handle: processor_handle_t, overdrive_value: int ): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -3957,15 +4003,15 @@ def amdsmi_set_gpu_overdrive_level( ) if not isinstance(overdrive_value, int): raise AmdSmiParameterException(overdrive_value, int) - overdrive_value = ctypes.c_uint32(overdrive_value) + overdrive_value_32 = ctypes.c_uint32(overdrive_value) _check_res( amdsmi_wrapper.amdsmi_set_gpu_overdrive_level( - processor_handle, overdrive_value) + processor_handle, overdrive_value_32) ) -def amdsmi_get_gpu_bdf_id(processor_handle: processor_handle): +def amdsmi_get_gpu_bdf_id(processor_handle: processor_handle_t): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( processor_handle, amdsmi_wrapper.amdsmi_processor_handle @@ -3981,7 +4027,7 @@ def amdsmi_get_gpu_bdf_id(processor_handle: processor_handle): def amdsmi_set_gpu_pci_bandwidth( - processor_handle: processor_handle, bitmask: int + processor_handle: processor_handle_t, bitmask: int ) -> None: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -4006,7 +4052,7 @@ def _format_transfer_rate(transfer_rate): } -def amdsmi_get_gpu_pci_bandwidth(processor_handle: processor_handle): +def amdsmi_get_gpu_pci_bandwidth(processor_handle: processor_handle_t): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( processor_handle, amdsmi_wrapper.amdsmi_processor_handle @@ -4027,7 +4073,7 @@ def amdsmi_get_gpu_pci_bandwidth(processor_handle: processor_handle): } -def amdsmi_get_gpu_pci_throughput(processor_handle: processor_handle): +def amdsmi_get_gpu_pci_throughput(processor_handle: processor_handle_t): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( processor_handle, amdsmi_wrapper.amdsmi_processor_handle @@ -4049,7 +4095,7 @@ def amdsmi_get_gpu_pci_throughput(processor_handle: processor_handle): } -def amdsmi_get_gpu_pci_replay_counter(processor_handle: processor_handle): +def amdsmi_get_gpu_pci_replay_counter(processor_handle: processor_handle_t): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( processor_handle, amdsmi_wrapper.amdsmi_processor_handle @@ -4065,7 +4111,7 @@ def amdsmi_get_gpu_pci_replay_counter(processor_handle: processor_handle): return counter.value -def amdsmi_get_gpu_topo_numa_affinity(processor_handle: processor_handle): +def amdsmi_get_gpu_topo_numa_affinity(processor_handle: processor_handle_t): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( processor_handle, amdsmi_wrapper.amdsmi_processor_handle @@ -4082,7 +4128,7 @@ def amdsmi_get_gpu_topo_numa_affinity(processor_handle: processor_handle): def amdsmi_set_power_cap( - processor_handle: processor_handle, sensor_ind: int, cap: int + processor_handle: processor_handle_t, sensor_ind: int, cap: int ) -> None: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -4103,7 +4149,7 @@ def amdsmi_set_power_cap( def amdsmi_set_gpu_power_profile( - processor_handle: processor_handle, + processor_handle: processor_handle_t, reserved: int, profile: AmdSmiPowerProfilePresetMasks, ) -> None: @@ -4125,7 +4171,7 @@ def amdsmi_set_gpu_power_profile( ) -def amdsmi_get_energy_count(processor_handle: processor_handle): +def amdsmi_get_energy_count(processor_handle: processor_handle_t): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( processor_handle, amdsmi_wrapper.amdsmi_processor_handle @@ -4148,7 +4194,7 @@ def amdsmi_get_energy_count(processor_handle: processor_handle): def amdsmi_set_gpu_clk_range( - processor_handle: processor_handle, + processor_handle: processor_handle_t, min_clk_value: int, max_clk_value: int, clk_type: AmdSmiClkType, @@ -4178,7 +4224,7 @@ def amdsmi_set_gpu_clk_range( def amdsmi_set_gpu_clk_limit( - processor_handle: processor_handle, + processor_handle: processor_handle_t, clk_type: str, limit_type: str, value: int @@ -4200,14 +4246,14 @@ def amdsmi_set_gpu_clk_limit( _check_res( amdsmi_wrapper.amdsmi_set_gpu_clk_limit( processor_handle, - amdsmi_wrapper.amdsmi_clk_type_t(clk_type_conversion), - amdsmi_wrapper.amdsmi_clk_limit_type_t(limit_type_conversion), + clk_type_conversion, + limit_type_conversion, ctypes.c_uint64(value), ) ) -def amdsmi_get_gpu_memory_total(processor_handle: processor_handle, mem_type: AmdSmiMemoryType): +def amdsmi_get_gpu_memory_total(processor_handle: processor_handle_t, mem_type: AmdSmiMemoryType): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( processor_handle, amdsmi_wrapper.amdsmi_processor_handle @@ -4229,7 +4275,7 @@ def amdsmi_get_gpu_memory_total(processor_handle: processor_handle, mem_type: Am def amdsmi_set_gpu_od_clk_info( - processor_handle: processor_handle, + processor_handle: processor_handle_t, level: AmdSmiFreqInd, value: int, clk_type: AmdSmiClkType, @@ -4255,7 +4301,7 @@ def amdsmi_set_gpu_od_clk_info( ) -def amdsmi_get_gpu_memory_usage(processor_handle: processor_handle, mem_type: AmdSmiMemoryType): +def amdsmi_get_gpu_memory_usage(processor_handle: processor_handle_t, mem_type: AmdSmiMemoryType): if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( processor_handle, amdsmi_wrapper.amdsmi_processor_handle @@ -4277,7 +4323,7 @@ def amdsmi_get_gpu_memory_usage(processor_handle: processor_handle, mem_type: Am def amdsmi_set_gpu_od_volt_info( - processor_handle: processor_handle, + processor_handle: processor_handle_t, vpoint: int, clk_value: int, volt_value: int, @@ -4307,7 +4353,7 @@ def amdsmi_set_gpu_od_volt_info( def amdsmi_get_gpu_fan_rpms( - processor_handle: processor_handle, sensor_idx: int + processor_handle: processor_handle_t, sensor_idx: int ) -> int: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -4326,7 +4372,7 @@ def amdsmi_get_gpu_fan_rpms( def amdsmi_get_gpu_fan_speed( - processor_handle: processor_handle, sensor_idx: int + processor_handle: processor_handle_t, sensor_idx: int ) -> int: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -4345,7 +4391,7 @@ def amdsmi_get_gpu_fan_speed( def amdsmi_get_gpu_fan_speed_max( - processor_handle: processor_handle, sensor_idx: int + processor_handle: processor_handle_t, sensor_idx: int ) -> int: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -4364,7 +4410,7 @@ def amdsmi_get_gpu_fan_speed_max( def amdsmi_get_temp_metric( - processor_handle: processor_handle, + processor_handle: processor_handle_t, sensor_type: AmdSmiTemperatureType, metric: AmdSmiTemperatureMetric, ) -> int: @@ -4388,7 +4434,7 @@ def amdsmi_get_temp_metric( def amdsmi_get_gpu_volt_metric( - processor_handle: processor_handle, + processor_handle: processor_handle_t, sensor_type: AmdSmiVoltageType, metric: AmdSmiVoltageMetric, ) -> int: @@ -4412,7 +4458,7 @@ def amdsmi_get_gpu_volt_metric( def amdsmi_get_utilization_count( - processor_handle: processor_handle, + processor_handle: processor_handle_t, counter_types: List[AmdSmiUtilizationCounterType] ) -> List[Dict[str, Any]]: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): @@ -4450,7 +4496,8 @@ def amdsmi_get_utilization_count( if count.value != len(counters): raise AmdSmiLibraryException(amdsmi_wrapper.AMDSMI_STATUS_API_FAILED) - result = [{"timestamp": timestamp.value}] + result = [] + result.append({"timestamp": timestamp.value}) for index in range(count.value): counter_type = amdsmi_wrapper.amdsmi_utilization_counter_type_t__enumvalues[ util_counter_list[index].type @@ -4466,7 +4513,7 @@ def amdsmi_get_utilization_count( def amdsmi_get_gpu_perf_level( - processor_handle: processor_handle, + processor_handle: processor_handle_t, ) -> str: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -4490,7 +4537,7 @@ def amdsmi_get_gpu_perf_level( def amdsmi_set_gpu_perf_determinism_mode( - processor_handle: processor_handle, clkvalue: int + processor_handle: processor_handle_t, clkvalue: int ) -> None: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -4504,7 +4551,7 @@ def amdsmi_set_gpu_perf_determinism_mode( def amdsmi_get_gpu_overdrive_level( - processor_handle: processor_handle, + processor_handle: processor_handle_t, ) -> int: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -4522,7 +4569,7 @@ def amdsmi_get_gpu_overdrive_level( def amdsmi_get_gpu_mem_overdrive_level( - processor_handle: processor_handle, + processor_handle: processor_handle_t, ) -> int: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -4540,7 +4587,7 @@ def amdsmi_get_gpu_mem_overdrive_level( def amdsmi_get_clk_freq( - processor_handle: processor_handle, clk_type: AmdSmiClkType + processor_handle: processor_handle_t, clk_type: AmdSmiClkType ) -> Dict[str, Any]: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -4565,7 +4612,7 @@ def amdsmi_get_clk_freq( def amdsmi_get_soc_pstate( - processor_handle: processor_handle, + processor_handle: processor_handle_t, ) -> Dict[str, Any]: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -4581,10 +4628,10 @@ def amdsmi_get_soc_pstate( polices = [] for i in range(0, policy.num_supported): - id = policy.policies[i].policy_id + policy_id = policy.policies[i].policy_id desc = policy.policies[i].policy_description polices.append({ - 'policy_id' : id, + 'policy_id' : policy_id, 'policy_description': desc.decode() }) current_id = policy.policies[policy.current].policy_id @@ -4597,7 +4644,7 @@ def amdsmi_get_soc_pstate( def amdsmi_get_xgmi_plpd( - processor_handle: processor_handle, + processor_handle: processor_handle_t ) -> Dict[str, Any]: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -4606,30 +4653,50 @@ def amdsmi_get_xgmi_plpd( policy = amdsmi_wrapper.amdsmi_dpm_policy_t() _check_res( - amdsmi_wrapper.amdsmi_get_xgmi_plpd( - processor_handle, ctypes.byref(policy) - ) + amdsmi_wrapper.amdsmi_get_xgmi_plpd(processor_handle, ctypes.byref(policy)) ) - polices = [] - for i in range(0, policy.num_supported): - id = policy.policies[i].policy_id - desc = policy.policies[i].policy_description - polices.append({ - 'policy_id' : id, - 'policy_description': desc.decode() - }) - current_id = policy.policies[policy.current].policy_id + policies = [] + for i in range(policy.num_supported): + try: + # Access the policy entry directly + policy_entry = policy.policies[i] + policy_id = policy_entry.policy_id - return { + # Handle the policy description more carefully + policy_desc_bytes = policy_entry.policy_description + if policy_desc_bytes: + # Convert ctypes array to bytes and decode + policy_desc = ctypes.string_at(policy_desc_bytes).decode('utf-8').rstrip('\x00') + else: + policy_desc = "" + + policies.append({ + 'policy_id': policy_id, + 'policy_description': policy_desc + }) + except (UnicodeDecodeError, AttributeError, ValueError): + # Fallback for problematic entries + policies.append({ + 'policy_id': 0, # Default fallback + 'policy_description': "" + }) + + # Get current policy ID correctly + if policy.current < policy.num_supported: + current_id = policy.policies[policy.current].policy_id + else: + current_id = 0 # Fallback + + return { "num_supported": policy.num_supported, "current_id": current_id, - "plpds": polices, + "plpds": policies, # Marked for deprecation + "policies": policies, # Correct field name } - def amdsmi_get_gpu_process_isolation( - processor_handle: processor_handle, + processor_handle: processor_handle_t, ) -> int: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -4647,7 +4714,7 @@ def amdsmi_get_gpu_process_isolation( def amdsmi_get_gpu_od_volt_info( - processor_handle: processor_handle, + processor_handle: processor_handle_t, ) -> Dict[str, Any]: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -4678,27 +4745,40 @@ def amdsmi_get_gpu_od_volt_info( return { "curr_sclk_range": { "lower_bound": sclk_lower, - "upper_bound": sclk_upper, + "upper_bound": sclk_upper }, "curr_mclk_range": { "lower_bound": mclk_lower, - "upper_bound": mclk_upper, + "upper_bound": mclk_upper }, "sclk_freq_limits": { "lower_bound": freq_data.sclk_freq_limits.lower_bound, - "upper_bound": freq_data.sclk_freq_limits.upper_bound, + "upper_bound": freq_data.sclk_freq_limits.upper_bound }, "mclk_freq_limits": { "lower_bound": freq_data.mclk_freq_limits.lower_bound, - "upper_bound": freq_data.mclk_freq_limits.upper_bound, + "upper_bound": freq_data.mclk_freq_limits.upper_bound }, - "curve.vc_points": list(freq_data.curve.vc_points), - "num_regions": freq_data.num_regions, + "curve.vc_points": [ + { + "frequency": freq_data.curve.vc_points[0].frequency, + "voltage": freq_data.curve.vc_points[0].voltage + }, + { + "frequency": freq_data.curve.vc_points[1].frequency, + "voltage": freq_data.curve.vc_points[1].voltage + }, + { + "frequency": freq_data.curve.vc_points[2].frequency, + "voltage": freq_data.curve.vc_points[2].voltage + } + ], + "num_regions": freq_data.num_regions } def amdsmi_get_gpu_metrics_info( - processor_handle: processor_handle, + processor_handle: processor_handle_t, ) -> Dict[str, Any]: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -4858,7 +4938,7 @@ def amdsmi_get_gpu_metrics_info( def amdsmi_get_gpu_od_volt_curve_regions( - processor_handle: processor_handle, num_regions: int + processor_handle: processor_handle_t, num_regions: int ) -> List[Dict[str, Any]]: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -4897,7 +4977,7 @@ def amdsmi_get_gpu_od_volt_curve_regions( def amdsmi_get_gpu_power_profile_presets( - processor_handle: processor_handle, sensor_idx: int + processor_handle: processor_handle_t, sensor_idx: int ) -> Dict[str, Any]: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -4921,7 +5001,7 @@ def amdsmi_get_gpu_power_profile_presets( def amdsmi_get_gpu_ecc_count( - processor_handle: processor_handle, block: AmdSmiGpuBlock + processor_handle: processor_handle_t, block: AmdSmiGpuBlock ) -> Dict[str, int]: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -4945,7 +5025,7 @@ def amdsmi_get_gpu_ecc_count( def amdsmi_get_gpu_ecc_enabled( - processor_handle: processor_handle, + processor_handle: processor_handle_t, ) -> int: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -4962,7 +5042,7 @@ def amdsmi_get_gpu_ecc_enabled( def amdsmi_get_gpu_ecc_status( - processor_handle: processor_handle, block: AmdSmiGpuBlock + processor_handle: processor_handle_t, block: AmdSmiGpuBlock ) -> AmdSmiRasErrState: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -4986,7 +5066,7 @@ def amdsmi_status_code_to_string(status: amdsmi_wrapper.amdsmi_status_t) -> Unio if not isinstance(status, amdsmi_wrapper.amdsmi_status_t): raise AmdSmiParameterException(status, amdsmi_wrapper.amdsmi_status_t) - status_string_p_p = POINTER(POINTER(ctypes.c_char())) + status_string_p_p = ctypes.pointer(ctypes.pointer(ctypes.c_char())) _check_res(amdsmi_wrapper.amdsmi_status_code_to_string( status, status_string_p_p)) @@ -5061,7 +5141,7 @@ def amdsmi_get_gpu_compute_process_gpus(pid: int) -> List[int]: def amdsmi_gpu_xgmi_error_status( - processor_handle: processor_handle, + processor_handle: processor_handle_t, ) -> AmdSmiXgmiStatus: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -5074,11 +5154,12 @@ def amdsmi_gpu_xgmi_error_status( processor_handle, ctypes.byref(status)) ) - return AmdSmiXgmiStatus(status.value).value + #return AmdSmiXgmiStatus(status.value).value + return AmdSmiXgmiStatus(status.value) def amdsmi_reset_gpu_xgmi_error( - processor_handle: processor_handle, + processor_handle: processor_handle_t, ) -> None: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -5089,7 +5170,7 @@ def amdsmi_reset_gpu_xgmi_error( def amdsmi_get_gpu_memory_reserved_pages( - processor_handle: processor_handle, + processor_handle: processor_handle_t, ) -> Union[list, str]: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -5107,7 +5188,7 @@ def amdsmi_get_gpu_memory_reserved_pages( if num_pages.value == 0: return [] - mem_reserved_pages = (amdsmi_wrapper.amdsmi_retired_page_record_t * num_pages)() + mem_reserved_pages = (amdsmi_wrapper.amdsmi_retired_page_record_t * num_pages.value)() _check_res( amdsmi_wrapper.amdsmi_get_gpu_memory_reserved_pages( processor_handle, ctypes.byref(num_pages), mem_reserved_pages @@ -5118,7 +5199,7 @@ def amdsmi_get_gpu_memory_reserved_pages( def amdsmi_get_gpu_metrics_header_info( - processor_handle: processor_handle, + processor_handle: processor_handle_t, ) -> Dict[str, int]: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): raise AmdSmiParameterException( @@ -5140,7 +5221,7 @@ def amdsmi_get_gpu_metrics_header_info( def amdsmi_get_link_topology_nearest( - processor_handle: processor_handle, + processor_handle: processor_handle_t, link_type: AmdSmiLinkType, )-> Dict[str, Any]: @@ -5163,7 +5244,7 @@ def amdsmi_get_link_topology_nearest( def amdsmi_get_gpu_virtualization_mode( - processor_handle: processor_handle + processor_handle: processor_handle_t ) -> Dict[str, AmdSmiVirtualizationMode]: # make info struct here @@ -5249,8 +5330,7 @@ def amdsmi_get_rocm_version()-> Tuple[bool, str]: return True, f"{major.value}.{minor.value}.{patch.value}" else: return False, "Failed to unpack ROCm version" - except OSError as e: - err = e + except OSError: continue # If we hit here, we were unable to find the librocm-core.so file @@ -5259,7 +5339,78 @@ def amdsmi_get_rocm_version()-> Tuple[bool, str]: return False, f"Unable to detect ROCm installation, Unknown Error: {e}" -def amdsmi_get_gpu_revision(processor_handle: processor_handle) -> str: +def amdsmi_get_cpu_handles() -> Dict[str, Any]: + cpu_handles = amdsmi_get_cpusocket_handles() + return { 'cpu_count': len(cpu_handles), 'processor_handles': cpu_handles } + + +def amdsmi_get_esmi_err_msg(status: AmdSmiStatus) -> str: + if not isinstance(status, AmdSmiStatus): + raise AmdSmiParameterException(status, AmdSmiStatus) + + # Create a pointer to a pointer to char (char**) + status_string_p_p = ctypes.pointer(ctypes.pointer(ctypes.c_char())) + + _check_res( + amdsmi_wrapper.amdsmi_get_esmi_err_msg( + status.value, + status_string_p_p + ) + ) + + # Use string_cast helper function if available in wrapper + if not status_string_p_p.contents: + pass + elif hasattr(amdsmi_wrapper, 'string_cast'): + error_msg = amdsmi_wrapper.string_cast(status_string_p_p.contents) + if isinstance(error_msg, str): + return error_msg + else: + # Manual string extraction + error_msg = ctypes.string_at(status_string_p_p.contents).decode('utf-8') + return error_msg + return "Unknown error" + + +def amdsmi_get_gpu_event_notification( + timeout_ms: int = 1000 +) -> Dict[str, Any]: + if not isinstance(timeout_ms, int): + raise AmdSmiParameterException(timeout_ms, int) + + # Convert timeout to C type + timeout_ms_c = ctypes.c_int32(timeout_ms) + + # Initialize output parameters + num_elem = ctypes.c_uint32(MAX_NUM_PROCESSES) + num_elem_p = ctypes.pointer(num_elem) + + # Create array for event notification data + data_array = (amdsmi_wrapper.amdsmi_evt_notification_data_t * MAX_NUM_PROCESSES)() + + _check_res( + amdsmi_wrapper.amdsmi_get_gpu_event_notification( + timeout_ms_c, num_elem_p, data_array + ) + ) + + results = [] + for i in range(num_elem_p.contents.value): + entry = { + 'processor_handle': data_array[i].processor_handle, + 'event': data_array[i].event, + 'message': data_array[i].message.decode('utf-8') if data_array[i].message else "" + } + results.append(entry) + + result = { + 'num_elem': num_elem_p.contents.value, + 'data': results + } + return result + + +def amdsmi_get_gpu_revision(processor_handle: processor_handle_t) -> str: """ Get the GPU revision for a given processor handle. @@ -5277,12 +5428,112 @@ def amdsmi_get_gpu_revision(processor_handle: processor_handle) -> str: raise AmdSmiParameterException( processor_handle, amdsmi_wrapper.amdsmi_processor_handle ) + revision_16 = ctypes.c_uint16() + _check_res(amdsmi_wrapper.amdsmi_get_gpu_revision(processor_handle, ctypes.byref(revision_16))) + + return _pad_hex_value(hex(revision_16.value), 2) + + +def amdsmi_get_processor_count_from_handles(processor_handles_list) -> Dict[str, int]: + if not isinstance(processor_handles_list, list): + raise AmdSmiParameterException(processor_handles_list, list) + + # Convert Python list to C array + processor_count = len(processor_handles_list) + processor_handles_array = (amdsmi_wrapper.amdsmi_processor_handle * processor_count)() + + for i, handle in enumerate(processor_handles_list): + processor_handles_array[i] = handle + + processor_count_p = ctypes.pointer(ctypes.c_uint32(processor_count)) + nr_cpusockets = ctypes.pointer(ctypes.c_uint32(0)) + nr_cpucores = ctypes.pointer(ctypes.c_uint32(0)) + nr_gpus = ctypes.pointer(ctypes.c_uint32(0)) - revision = ctypes.c_uint16() _check_res( - amdsmi_wrapper.amdsmi_get_gpu_revision( - processor_handle, ctypes.byref(revision) + amdsmi_wrapper.amdsmi_get_processor_count_from_handles( + processor_handles_array, processor_count_p, nr_cpusockets, nr_cpucores, nr_gpus ) ) - return _pad_hex_value(hex(revision.value), 2) + return { + 'nr_cpusockets': nr_cpusockets.contents.value, + 'nr_cpucores': nr_cpucores.contents.value, + 'nr_gpus': nr_gpus.contents.value + } + + +def amdsmi_get_processor_handles_by_type(socket_handle: socket_handle_t, processor_type: AmdSmiProcessorType): + if not isinstance(socket_handle, amdsmi_wrapper.amdsmi_socket_handle): + raise AmdSmiParameterException(socket_handle, amdsmi_wrapper.amdsmi_socket_handle) + if not isinstance(processor_type, AmdSmiProcessorType): + raise AmdSmiParameterException(processor_type, AmdSmiProcessorType) + + processor_handles = (amdsmi_wrapper.amdsmi_processor_handle * MAX_NUM_PROCESSES)() + processor_count = ctypes.c_uint32(0) + ptr_processor_count = ctypes.pointer(processor_count) + + _check_res( + amdsmi_wrapper.amdsmi_get_processor_handles_by_type( + socket_handle, processor_type, processor_handles, ptr_processor_count + ) + ) + + entry = [] + for i in range(ptr_processor_count.contents.value): + entry.append(processor_handles[i]) + return { + 'processor_handles': entry, + 'processor_count': ptr_processor_count.contents.value + } + + +def amdsmi_gpu_validate_ras_eeprom(processor_handle: processor_handle_t): + if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): + raise AmdSmiParameterException( + processor_handle, amdsmi_wrapper.amdsmi_processor_handle + ) + + _check_res(amdsmi_wrapper.amdsmi_gpu_validate_ras_eeprom(processor_handle)) + + +def amdsmi_init_gpu_event_notification(processor_handle: processor_handle_t): + if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): + raise AmdSmiParameterException( + processor_handle, amdsmi_wrapper.amdsmi_processor_handle + ) + + _check_res(amdsmi_wrapper.amdsmi_init_gpu_event_notification(processor_handle)) + + +def amdsmi_set_gpu_event_notification_mask(processor_handle: processor_handle_t, mask: int): + if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): + raise AmdSmiParameterException( + processor_handle, amdsmi_wrapper.amdsmi_processor_handle + ) + + if not isinstance(mask, int): + raise AmdSmiParameterException(mask, int) + + mask_64 = ctypes.c_uint64(mask) + + _check_res(amdsmi_wrapper.amdsmi_set_gpu_event_notification_mask(processor_handle, mask_64)) + + +def amdsmi_stop_gpu_event_notification( + processor_handle: processor_handle_t +): + if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): + raise AmdSmiParameterException( + processor_handle, amdsmi_wrapper.amdsmi_processor_handle + ) + _check_res(amdsmi_wrapper.amdsmi_stop_gpu_event_notification(processor_handle)) + + +def amdsmi_get_gpu_busy_percent(processor_handle: processor_handle_t): + if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): + raise AmdSmiParameterException(processor_handle, amdsmi_wrapper.amdsmi_processor_handle) + + gpu_busy_percent = ctypes.c_uint32(0) + _check_res(amdsmi_wrapper.amdsmi_get_gpu_busy_percent(processor_handle, ctypes.byref(gpu_busy_percent))) + return gpu_busy_percent.value diff --git a/src/amd_smi/amd_smi.cc b/src/amd_smi/amd_smi.cc index 5510bb55ac..89f053a5e7 100644 --- a/src/amd_smi/amd_smi.cc +++ b/src/amd_smi/amd_smi.cc @@ -63,7 +63,6 @@ #include "rocm_smi/rocm_smi_utils.h" #include "rocm_smi/rocm_smi_kfd.h" - // a global instance of std::mutex to protect data passed during threads std::mutex myMutex; @@ -495,8 +494,7 @@ amdsmi_status_t amdsmi_get_processor_count_from_handles(amdsmi_processor_handle* } for (uint32_t i = 0; i < *processor_count; i++) { - amdsmi_status_t r = amdsmi_get_processor_type(processor_handles[i], - &processor_type); + amdsmi_status_t r = amdsmi_get_processor_type(processor_handles[i], &processor_type); if (r != AMDSMI_STATUS_SUCCESS) return r; if(processor_type == AMDSMI_PROCESSOR_TYPE_AMD_CPU) { @@ -546,7 +544,7 @@ amdsmi_status_t amdsmi_get_processor_handles_by_type(amdsmi_socket_handle socket #endif -amdsmi_status_t amdsmi_get_processor_type(amdsmi_processor_handle processor_handle , +amdsmi_status_t amdsmi_get_processor_type(amdsmi_processor_handle processor_handle, processor_type_t* processor_type) { AMDSMI_CHECK_INIT(); diff --git a/tests/python_unittest/unit_tests.py b/tests/python_unittest/unit_tests.py index 887ed35310..a502b100a7 100755 --- a/tests/python_unittest/unit_tests.py +++ b/tests/python_unittest/unit_tests.py @@ -20,6 +20,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. +import ctypes import inspect import json import sys @@ -28,13 +29,15 @@ sys.path.append("/opt/rocm/libexec/amdsmi_cli/") try: import amdsmi -except ImportError: - raise ImportError("Could not import /opt/rocm/libexec/amdsmi_cli/amdsmi_cli.py") +except ImportError as exc: + raise ImportError("Could not import /opt/rocm/libexec/amdsmi_cli/amdsmi_cli.py") from exc - -has_info_printed = False -not_supported_error_codes = ['2', '3', '49'] -not_supported_error_code_names = ['AMDSMI_STATUS_NOT_SUPPORTED', 'AMDSMI_STATUS_NOT_YET_IMPLEMENTED', 'AMDSMI_STATUS_NO_HSMP_MSG_SUP'] +not_supported_error_codes = \ +[ + ( '2', 'AMDSMI_STATUS_NOT_SUPPORTED'), + ( '3', 'AMDSMI_STATUS_NOT_YET_IMPLEMENTED'), + ('49', 'AMDSMI_STATUS_NO_HSMP_MSG_SUP') +] error_map = \ { @@ -85,41 +88,15 @@ error_map = \ '0xFFFFFFFF': 'AMDSMI_STATUS_UNKNOWN_ERROR' } +verbose=1 +if '-q' in sys.argv or '--quiet' in sys.argv: + verbose=0 +elif '-v' in sys.argv or '--verbose' in sys.argv: + verbose=2 +has_info_printed = False + class TestAmdSmiPythonBDF(unittest.TestCase): - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - self.verbose = verbose - self.max_num_physical_devices = amdsmi.amdsmi_interface.AMDSMI_MAX_NUM_XCP * amdsmi.amdsmi_interface.AMDSMI_MAX_DEVICES - global has_info_printed - if self.verbose and has_info_printed is False: - # Execute the following to print the asic and board info once per test run - has_info_printed = True - self.setUp() - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), self.max_num_physical_devices) - for i in range(0, len(processors)): - try: - # Print asic info - msg = f'asic info(gpu={i})' - ret = amdsmi.amdsmi_get_gpu_asic_info(processors[i]) - self._print(msg, ret) - except amdsmi.AmdSmiLibraryException as e: - raise e - for i in range(0, len(processors)): - try: - # Print board info - msg = f'board info(gpu={i})' - ret = amdsmi.amdsmi_get_gpu_board_info(processors[i]) - self._print(msg, ret) - except amdsmi.AmdSmiLibraryException as e: - raise e - self.tearDown() - return - valid_bdfs = { "00:00.0": [0, 0, 0, 0], "01:01.1": [0, 1, 1, 1], @@ -202,24 +179,160 @@ class TestAmdSmiPythonBDF(unittest.TestCase): "0000:00:00.0 ": None, } + def test_parse_bdf(self): + # go through all bdfs + expectations = self.valid_bdfs.copy() + expectations.update(self.invalid_bdfs) + for bdf in expectations: + expected = expectations[bdf] + result = amdsmi.amdsmi_interface._parse_bdf(bdf) + self.assertEqual(result, expected, + "Expected {} for bdf {}, but got {}".format( + expected, bdf, result)) + + @classmethod + def _convert_bdf_to_long(cls, bdf): + if len(bdf) == 12: + return bdf + if len(bdf) == 7: + return "0000:" + bdf + return None + + def test_format_bdf(self): + # go through valid bdfs + expectations = self.valid_bdfs.copy() + for bdf_string in expectations: + # use key as result and value as input + bdf_list = expectations[bdf_string] + smi_bdf = amdsmi.amdsmi_interface._make_amdsmi_bdf_from_list(bdf_list) + expected = TestAmdSmiPythonBDF._convert_bdf_to_long(bdf_string) + if expected: + expected = expected.lower() + if smi_bdf: + result = amdsmi.amdsmi_interface._format_bdf(smi_bdf) + else: + result = "None" + self.assertEqual(result, expected, + "Expected {} for bdf {}, but got {}".format( + expected, bdf_string, result)) + + def test_check_res(self): + # expect retry error to raise SmiRetryException + with self.assertRaises(amdsmi.AmdSmiRetryException) as retry_test: + amdsmi.amdsmi_interface._check_res( + (lambda: amdsmi.amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_RETRY)()) + # except retry error to have AMDSMI_STATUS_RETRY error code + self.assertEqual(retry_test.exception.get_error_code(), + amdsmi.amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_RETRY) + + # expect timeout error to raise SmiTimeoutException + with self.assertRaises(amdsmi.AmdSmiTimeoutException) as timeout_test: + amdsmi.amdsmi_interface._check_res( + (lambda: amdsmi.amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_TIMEOUT)()) + # except timeout error to have AMDSMI_STATUS_RETRY error code + self.assertEqual(timeout_test.exception.get_error_code(), + amdsmi.amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_TIMEOUT) + + # expect invalid args error to raise AmdSmiLibraryException + with self.assertRaises(amdsmi.AmdSmiLibraryException) as inval_test: + amdsmi.amdsmi_interface._check_res( + (lambda: amdsmi.amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_INVAL)()) + # expect invalid args error to have AMDSMI_STATUS_INVAL error code + self.assertEqual(inval_test.exception.get_error_code(), + amdsmi.amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_INVAL) + +class TestAmdSmiPython(unittest.TestCase): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + global has_info_printed + if verbose and has_info_printed is False: + # Execute the following to print the asic and board info once + # per test run + has_info_printed = True + self.setUp() + for i, gpu in enumerate(self.processors): + try: + # Print asic info + msg = f'asic info(gpu={i})' + ret = amdsmi.amdsmi_get_gpu_asic_info(gpu) + self._print(msg, ret) + # Print board info + msg = f'board info(gpu={i})' + ret = amdsmi.amdsmi_get_gpu_board_info(gpu) + self._print(msg, ret) + except amdsmi.AmdSmiLibraryException as e: + raise e + self.tearDown() + PASS = 'AMDSMI_STATUS_SUCCESS' FAIL = 'AMDSMI_STATUS_INVAL' + max_num_physical_devices = amdsmi.amdsmi_interface.AMDSMI_MAX_NUM_XCP * amdsmi.amdsmi_interface.AMDSMI_MAX_DEVICES # Tests marked wtih either of these flags will be skipped # and need to be implemented later. - TODO_SKIP_NYI = True TODO_SKIP_FAIL = True TODO_SKIP_NOT_COMPLETE = True + status_types = \ + [ + ('SUCCESS', amdsmi.AmdSmiStatus.SUCCESS, PASS), + ('INVAL', amdsmi.AmdSmiStatus.INVAL, PASS), + ('NOT_SUPPORTED', amdsmi.AmdSmiStatus.NOT_SUPPORTED, PASS), + ('NOT_YET_IMPLEMENTED', amdsmi.AmdSmiStatus.NOT_YET_IMPLEMENTED, PASS), + ('FAIL_LOAD_MODULE', amdsmi.AmdSmiStatus.FAIL_LOAD_MODULE, PASS), + ('FAIL_LOAD_SYMBOL', amdsmi.AmdSmiStatus.FAIL_LOAD_SYMBOL, PASS), + ('DRM_ERROR', amdsmi.AmdSmiStatus.DRM_ERROR, PASS), + ('API_FAILED', amdsmi.AmdSmiStatus.API_FAILED, PASS), + ('TIMEOUT', amdsmi.AmdSmiStatus.TIMEOUT, PASS), + ('RETRY', amdsmi.AmdSmiStatus.RETRY, PASS), + ('NO_PERM', amdsmi.AmdSmiStatus.NO_PERM, PASS), + ('INTERRUPT', amdsmi.AmdSmiStatus.INTERRUPT, PASS), + ('IO', amdsmi.AmdSmiStatus.IO, PASS), + ('ADDRESS_FAULT', amdsmi.AmdSmiStatus.ADDRESS_FAULT, PASS), + ('FILE_ERROR', amdsmi.AmdSmiStatus.FILE_ERROR, PASS), + ('OUT_OF_RESOURCES', amdsmi.AmdSmiStatus.OUT_OF_RESOURCES, PASS), + ('INTERNAL_EXCEPTION', amdsmi.AmdSmiStatus.INTERNAL_EXCEPTION, PASS), + ('INPUT_OUT_OF_BOUNDS', amdsmi.AmdSmiStatus.INPUT_OUT_OF_BOUNDS, PASS), + ('INIT_ERROR', amdsmi.AmdSmiStatus.INIT_ERROR, PASS), + ('REFCOUNT_OVERFLOW', amdsmi.AmdSmiStatus.REFCOUNT_OVERFLOW, PASS), + ('DIRECTORY_NOT_FOUND', amdsmi.AmdSmiStatus.DIRECTORY_NOT_FOUND, PASS), + ('BUSY', amdsmi.AmdSmiStatus.BUSY, PASS), + ('NOT_FOUND', amdsmi.AmdSmiStatus.NOT_FOUND, PASS), + ('NOT_INIT', amdsmi.AmdSmiStatus.NOT_INIT, PASS), + ('NO_SLOT', amdsmi.AmdSmiStatus.NO_SLOT, PASS), + ('DRIVER_NOT_LOADED', amdsmi.AmdSmiStatus.DRIVER_NOT_LOADED, PASS), + ('MORE_DATA', amdsmi.AmdSmiStatus.MORE_DATA, PASS), + ('NO_DATA', amdsmi.AmdSmiStatus.NO_DATA, PASS), + ('INSUFFICIENT_SIZE', amdsmi.AmdSmiStatus.INSUFFICIENT_SIZE, PASS), + ('UNEXPECTED_SIZE', amdsmi.AmdSmiStatus.UNEXPECTED_SIZE, PASS), + ('UNEXPECTED_DATA', amdsmi.AmdSmiStatus.UNEXPECTED_DATA, PASS), + ('NON_AMD_CPU', amdsmi.AmdSmiStatus.NON_AMD_CPU, PASS), + ('NO_ENERGY_DRV', amdsmi.AmdSmiStatus.NO_ENERGY_DRV, PASS), + ('NO_MSR_DRV', amdsmi.AmdSmiStatus.NO_MSR_DRV, PASS), + ('NO_HSMP_DRV', amdsmi.AmdSmiStatus.NO_HSMP_DRV, PASS), + ('NO_HSMP_SUP', amdsmi.AmdSmiStatus.NO_HSMP_SUP, PASS), + ('NO_HSMP_MSG_SUP', amdsmi.AmdSmiStatus.NO_HSMP_MSG_SUP, PASS), + ('HSMP_TIMEOUT', amdsmi.AmdSmiStatus.HSMP_TIMEOUT, PASS), + ('NO_DRV', amdsmi.AmdSmiStatus.NO_DRV, PASS), + ('FILE_NOT_FOUND', amdsmi.AmdSmiStatus.FILE_NOT_FOUND, PASS), + ('ARG_PTR_NULL', amdsmi.AmdSmiStatus.ARG_PTR_NULL, PASS), + ('AMDGPU_RESTART_ERR', amdsmi.AmdSmiStatus.AMDGPU_RESTART_ERR, PASS), + ('SETTING_UNAVAILABLE', amdsmi.AmdSmiStatus.SETTING_UNAVAILABLE, PASS), + ('CORRUPTED_EEPROM', amdsmi.AmdSmiStatus.CORRUPTED_EEPROM, PASS), + ('MAP_ERROR', amdsmi.AmdSmiStatus.MAP_ERROR, PASS), + ('UNKNOWN_ERROR', amdsmi.AmdSmiStatus.UNKNOWN_ERROR, PASS) + ] + clk_types = \ [ ('SYS', amdsmi.AmdSmiClkType.SYS, PASS), ('GFX', amdsmi.AmdSmiClkType.GFX, PASS), ('DF', amdsmi.AmdSmiClkType.DF, PASS), - ('DCEF', amdsmi.AmdSmiClkType.DCEF, PASS), + ('DCEF', amdsmi.AmdSmiClkType.DCEF, [PASS, FAIL]), ('SOC', amdsmi.AmdSmiClkType.SOC, PASS), ('MEM', amdsmi.AmdSmiClkType.MEM, PASS), - ('PCIE', amdsmi.AmdSmiClkType.PCIE, PASS), + ('PCIE', amdsmi.AmdSmiClkType.PCIE, [PASS, FAIL]), ('VCLK0', amdsmi.AmdSmiClkType.VCLK0, PASS), ('VCLK1', amdsmi.AmdSmiClkType.VCLK1, PASS), ('DCLK0', amdsmi.AmdSmiClkType.DCLK0, PASS), @@ -234,9 +347,9 @@ class TestAmdSmiPythonBDF(unittest.TestCase): io_bw_encodings = \ [ - ('AGG_BW0', amdsmi.amdsmi_wrapper.AGG_BW0, PASS), - ('RD_BW0', amdsmi.amdsmi_wrapper.RD_BW0, PASS), - ('WR_BW0', amdsmi.amdsmi_wrapper.WR_BW0, PASS) + ('AGG_BW0', amdsmi.amdsmi_interface.amdsmi_wrapper.AGG_BW0, PASS), + ('RD_BW0', amdsmi.amdsmi_interface.amdsmi_wrapper.RD_BW0, PASS), + ('WR_BW0', amdsmi.amdsmi_interface.amdsmi_wrapper.WR_BW0, PASS) ] event_groups = \ @@ -278,6 +391,17 @@ class TestAmdSmiPythonBDF(unittest.TestCase): ('GTT', amdsmi.AmdSmiMemoryType.GTT, PASS) ] + processor_types = \ + [ + ('UNKNOWN', amdsmi.AmdSmiProcessorType.UNKNOWN, FAIL), + ('AMD_GPU', amdsmi.AmdSmiProcessorType.AMD_GPU, PASS), + ('AMD_CPU', amdsmi.AmdSmiProcessorType.AMD_CPU, PASS), + ('NON_AMD_GPU', amdsmi.AmdSmiProcessorType.NON_AMD_GPU, PASS), + ('NON_AMD_CPU', amdsmi.AmdSmiProcessorType.NON_AMD_CPU, PASS), + ('AMD_CPU_CORE', amdsmi.AmdSmiProcessorType.AMD_CPU_CORE, PASS), + ('AMD_APU', amdsmi.AmdSmiProcessorType.AMD_APU, PASS) + ] + reg_types = \ [ ('XGMI', amdsmi.AmdSmiRegType.XGMI, PASS), @@ -429,3674 +553,2867 @@ class TestAmdSmiPythonBDF(unittest.TestCase): ('COMPUTE_MASK', amdsmi.AmdSmiPowerProfilePresetMasks.COMPUTE_MASK, PASS), ('VR_MASK', amdsmi.AmdSmiPowerProfilePresetMasks.VR_MASK, PASS), ('THREE_D_FULL_SCR_MASK', amdsmi.AmdSmiPowerProfilePresetMasks.THREE_D_FULL_SCR_MASK, PASS), - ('BOOTUP_DEFAULT', amdsmi.AmdSmiPowerProfilePresetMasks.BOOTUP_DEFAULT, PASS), - ('INVALID', amdsmi.AmdSmiPowerProfilePresetMasks.INVALID, FAIL) + ('BOOTUP_DEFAULT', amdsmi.AmdSmiPowerProfilePresetMasks.BOOTUP_DEFAULT, PASS) ] - def test_parse_bdf(self): - # go through all bdfs - expectations = self.valid_bdfs.copy() - expectations.update(self.invalid_bdfs) - for bdf in expectations: - expected = expectations[bdf] - result = amdsmi.amdsmi_interface._parse_bdf(bdf) - self.assertEqual(result, expected, - "Expected {} for bdf {}, but got {}".format( - expected, bdf, result)) - - @classmethod - def _convert_bdf_to_long(clz, bdf): - if len(bdf) == 12: - return bdf - if len(bdf) == 7: - return "0000:" + bdf - return None - - def test_format_bdf(self): - # go through valid bdfs - expectations = self.valid_bdfs.copy() - for bdf_string in expectations: - # use key as result and value as input - bdf_list = expectations[bdf_string] - smi_bdf = amdsmi.amdsmi_interface._make_amdsmi_bdf_from_list(bdf_list) - expected = TestAmdSmiPythonBDF._convert_bdf_to_long(bdf_string) - expected = expected.lower() - result = amdsmi.amdsmi_interface._format_bdf(smi_bdf) - self.assertEqual(result, expected, - "Expected {} for bdf {}, but got {}".format( - expected, bdf_string, result)) - - def test_check_res(self): - # expect retry error to raise SmiRetryException - with self.assertRaises(amdsmi.AmdSmiRetryException) as retry_test: - amdsmi.amdsmi_interface._check_res( - (lambda: amdsmi.amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_RETRY)()) - # except retry error to have AMDSMI_STATUS_RETRY error code - self.assertEqual(retry_test.exception.get_error_code(), - amdsmi.amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_RETRY) - # expect invalid args error to raise AmdSmiLibraryException - with self.assertRaises(amdsmi.AmdSmiLibraryException) as inval_test: - amdsmi.amdsmi_interface._check_res( - (lambda: amdsmi.amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_INVAL)()) - # expect invalid args error to have AMDSMI_STATUS_INVAL error code - self.assertEqual(inval_test.exception.get_error_code(), - amdsmi.amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_INVAL) - # for successfull call, expect no error is given - result = amdsmi.amdsmi_interface._check_res( - (lambda: amdsmi.amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_SUCCESS)()) - self.assertEqual(None, result) - - @classmethod def _print(self, msg, data=None): if verbose == 2: - if not data: + if data is None: print(msg, flush=True) - elif data in not_supported_error_code_names: - print(f'{msg}{data}', flush=True) + elif any(data in value for value in not_supported_error_codes): + print(f'{msg} {data}', flush=True) else: if isinstance(data, str) and data in error_map.values(): print(msg, end='') else: print(msg) - print(json.dumps(data, sort_keys=False, indent=4), flush=True) + if isinstance(data, str) or isinstance(data, int): + print(data) + else: + print(json.dumps(data, sort_keys=False, indent=4), flush=True) return - @classmethod def _print_func_name(self, msg): if verbose == 2: stk = inspect.stack() - if stk[2].function == '_callSetUp': + if stk[1].function == '_callSetUp': return print(msg, flush=True) - print(f'{stk[2].function}', flush=True) + print(f'## {stk[1].function}()', flush=True) return - @classmethod def get_error_code(self, e): error_code = e.get_error_code() return error_map[error_code] - @classmethod - def _check_ret(self, msg, _e, expected_code=None): - error_code = str(_e.get_error_code()) - error_code_name = error_map[error_code] - if error_code in not_supported_error_codes: - if verbose == 2: + def _check_ret(self, msg, _e, expected_code=None, printit=True): + error_code_int = int(_e.get_error_code()) + error_code = str(error_code_int) + if error_code in error_map: + error_code_name = error_map[error_code] + else: + error_code_name = 'UNKNOWN_ERROR' + + # Check for when there are multiple passing conditions + if isinstance(expected_code, list): + for ec in expected_code: + rc = self._check_ret(msg, _e, ec, False) # Do not print msg, otherwise multiple msgs printed + if not rc: + rc = self._check_ret(msg, _e, ec) # Call check again so msg is printed + return rc + + # No expected results found + print(f'{msg}\nTest FAILED with expected results {expected_code} but received {error_code_name}', flush=True) + return True + + # Check for single passing condition + if any(error_code in value for value in not_supported_error_codes): + if verbose == 2 and printit: print(f'{msg}\nTest SKIPPED with result {error_code_name}', flush=True) elif error_code_name == expected_code: - if verbose == 2: + if verbose == 2 and printit: print(f'{msg}\nTest PASSED with expected result {expected_code}', flush=True) else: - if verbose == 2: + if verbose == 2 and printit: print(f'{msg}\nTest FAILED with expected result {expected_code} but received {error_code_name}', flush=True) return True return False def setUp(self): - self._print_func_name('') + # Called before each test by unittest framework + self.raise_exception = None amdsmi.amdsmi_init() + self.processors = amdsmi.amdsmi_get_processor_handles() + self.assertGreaterEqual(len(self.processors), 1) + self.assertLessEqual(len(self.processors), self.max_num_physical_devices) + return def tearDown(self): + # Called after each test by unittest framework amdsmi.amdsmi_shut_down() - + return def test_clean_gpu_local_data(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f"gpu({i}): " - ret = amdsmi.amdsmi_clean_gpu_local_data(processors[i]) - self._print(msg, ret) + amdsmi.amdsmi_clean_gpu_local_data(gpu) + self._print(msg, '') except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_cpu_apb_disable(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + pstate = 0 + for i, gpu in enumerate(self.processors): + msg = f'### amdsmi_cpu_apb_disable(gpu={i}, pstate={pstate}):' try: - msg = f"gpu({i}): " - ret = amdsmi.amdsmi_cpu_apb_disable(processors[i], 0) - self._print(msg, ret) + amdsmi.amdsmi_cpu_apb_disable(gpu, pstate) + self._print(msg, '') except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_cpu_apb_enable(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f"gpu({i}): " - ret = amdsmi.amdsmi_cpu_apb_enable(processors[i]) - self._print(msg, ret) + amdsmi.amdsmi_cpu_apb_enable(gpu) + self._print(msg, '') except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_first_online_core_on_cpu_socket(self): + self._print_func_name('') if self.TODO_SKIP_FAIL: - self.skipTest("Skipping test_first_online_core_on_cpu_socket as it fails.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self.skipTest("Skipping test_first_online_core_on_cpu_socket as it fails (IO Error).") + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f"gpu({i}): " - ret = amdsmi.amdsmi_first_online_core_on_cpu_socket(processors[i]) + ret = amdsmi.amdsmi_first_online_core_on_cpu_socket(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_clk_freq(self): + self._print_func_name('') if self.TODO_SKIP_FAIL: - self.skipTest("Skipping test_get_clock_info as it fails.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self.skipTest("Skipping test_get_clk_freq as it fails (MI350X, AMDSMI_STATUS_UNEXPECTED_DATA).") + for i, gpu in enumerate(self.processors): for clk_type_name, clk_type, clk_cond in self.clk_types: + msg = f'gpu({i}): Clock Type({clk_type_name}):' try: - msg = f"gpu({i}): Clock Type({clk_type_name}): " - ret = amdsmi.amdsmi_get_clk_freq(processors[i], clk_type) + ret = amdsmi.amdsmi_get_clk_freq(gpu, clk_type) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, clk_cond): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_clock_info(self): + self._print_func_name('') if self.TODO_SKIP_FAIL: - self.skipTest("Skipping test_get_clock_info as it fails.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self.skipTest("Skipping test_get_clock_info as it fails (MI350X, AMDSMI_STATUS_UNEXPECTED_DATA).") + for i, gpu in enumerate(self.processors): for clk_type_name, clk_type, clk_cond in self.clk_types: + msg = f'### test amdsmi_get_clock_info(gpu={i}, Clock Type={clk_type_name})' try: - msg = f'gpu({i}): Clock Type({clk_type_name}): ' - ret = amdsmi.amdsmi_get_clock_info(processors[i], clk_type) + ret = amdsmi.amdsmi_get_clock_info(gpu, clk_type) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, clk_cond): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_cpu_cclk_limit(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_cpu_cclk_limit(processors[i]) + ret = amdsmi.amdsmi_get_cpu_cclk_limit(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - - - def test_get_cpu_core_boostlimit(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): - try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_cpu_core_boostlimit(processors[i]) - self._print(msg, ret) - except amdsmi.AmdSmiLibraryException as e: - if self._check_ret(msg, e, self.PASS): - raise_exception = e - - if raise_exception: - raise raise_exception + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_cpu_core_current_freq_limit(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f"gpu({i}): " - ret = amdsmi.amdsmi_get_cpu_core_current_freq_limit(processors[i]) + ret = amdsmi.amdsmi_get_cpu_core_current_freq_limit(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_cpu_core_energy(self): + self._print_func_name('') if self.TODO_SKIP_FAIL: - self.skipTest("Skipping test_get_cpu_core_energy as it fails.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self.skipTest("Skipping test_get_cpu_core_energy as it fails (IO Error).") + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f"gpu({i}): " - ret = amdsmi.amdsmi_get_cpu_core_energy(processors[i]) + ret = amdsmi.amdsmi_get_cpu_core_energy(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_cpu_current_io_bandwidth(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): for encoding_name, encoding, encoding_cond in self.io_bw_encodings: + msg = f'gpu({i}): encodeing({encoding_name}):' try: - msg = f'gpu({i}): encodeing({encoding_name}): ' - ret = amdsmi.amdsmi_get_cpu_current_io_bandwidth(processors[i], encoding, encoding_name) + ret = amdsmi.amdsmi_get_cpu_current_io_bandwidth(gpu, encoding, encoding_name) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, encoding_cond): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_cpu_ddr_bw(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_cpu_ddr_bw(processors[i]) + ret = amdsmi.amdsmi_get_cpu_ddr_bw(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_cpu_dimm_power_consumption(self): - if self.TODO_SKIP_FAIL: - self.skipTest("Skipping test_get_cpu_dimm_power_consumption as it fails.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - # TODO dimm_addr = 0 + self._print_func_name('') + # TODO Find better way to get dimm_addr dimm_addr = 0 - for i in range(0, len(processors)): + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_cpu_dimm_power_consumption(processors[i], dimm_addr) + ret = amdsmi.amdsmi_get_cpu_dimm_power_consumption(gpu, dimm_addr) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_cpu_dimm_temp_range_and_refresh_rate(self): + self._print_func_name('') if self.TODO_SKIP_FAIL: self.skipTest("Skipping test_get_cpu_dimm_temp_range_and_refresh_rate as it fails.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - # TODO dimm_addr = 0 + # TODO Find better way to get dimm_addr dimm_addr = 0 - for i in range(0, len(processors)): + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_cpu_dimm_temp_range_and_refresh_rate(processors[i], dimm_addr) + ret = amdsmi.amdsmi_get_cpu_dimm_temp_range_and_refresh_rate(gpu, dimm_addr) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_cpu_dimm_thermal_sensor(self): + self._print_func_name('') if self.TODO_SKIP_FAIL: self.skipTest("Skipping test_get_cpu_dimm_thermal_sensor as it fails.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - # TODO dimm_addr = 0 + # TODO Find better way to get dimm_addr dimm_addr = 0 - for i in range(0, len(processors)): + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_cpu_dimm_thermal_sensor(processors[i], dimm_addr) + ret = amdsmi.amdsmi_get_cpu_dimm_thermal_sensor(gpu, dimm_addr) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_cpu_family(self): + self._print_func_name('') if self.TODO_SKIP_FAIL: - self.skipTest("Skipping test_get_cpu_family as it fails.") - raise_exception = None - + self.skipTest("Skipping test_get_cpu_family as it fails (IO Error).") + msg = '' try: - msg = '' ret = amdsmi.amdsmi_get_cpu_family() self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_cpu_fclk_mclk(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_cpu_fclk_mclk(processors[i]) + ret = amdsmi.amdsmi_get_cpu_fclk_mclk(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_cpu_handles(self): - if self.TODO_SKIP_NYI: - self.skipTest("Skipping test_get_cpu_handles as it is not implemented yet.") - raise_exception = None - + self._print_func_name('') + msg = '' try: - msg = '' - ret = amdsmi.amdsmi_get_cpu_handles(amdsmi.amdsmi_interface.AMDSMI_MAX_DEVICES) + ret = amdsmi.amdsmi_get_cpu_handles() self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_cpu_hsmp_driver_version(self): + self._print_func_name('') if self.TODO_SKIP_FAIL: - self.skipTest("Skipping test_get_cpu_hsmp_driver_version as it fails.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self.skipTest("Skipping test_get_cpu_hsmp_driver_version as it fails (IO Error).") + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_cpu_hsmp_driver_version(processors[i]) + ret = amdsmi.amdsmi_get_cpu_hsmp_driver_version(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_cpu_hsmp_proto_ver(self): + self._print_func_name('') if self.TODO_SKIP_FAIL: - self.skipTest("Skipping test_get_cpu_hsmp_proto_ver as it fails.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self.skipTest("Skipping test_get_cpu_hsmp_proto_ver as it fails (IO Error).") + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_cpu_hsmp_proto_ver(processors[i]) + ret = amdsmi.amdsmi_get_cpu_hsmp_proto_ver(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_cpu_model(self): + self._print_func_name('') if self.TODO_SKIP_FAIL: - self.skipTest("Skipping test_get_cpu_model as it fails.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) + self.skipTest("Skipping test_get_cpu_model as it fails (IO Error).") + msg = '' try: - msg = '' ret = amdsmi.amdsmi_get_cpu_model() self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: - if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + if self._check_ret(msg, e, self.PASS): + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_cpu_prochot_status(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_cpu_prochot_status(processors[i]) + ret = amdsmi.amdsmi_get_cpu_prochot_status(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_cpu_pwr_svi_telemetry_all_rails(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_cpu_pwr_svi_telemetry_all_rails(processors[i]) + ret = amdsmi.amdsmi_get_cpu_pwr_svi_telemetry_all_rails(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_cpu_smu_fw_version(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_cpu_smu_fw_version(processors[i]) + ret = amdsmi.amdsmi_get_cpu_smu_fw_version(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_cpu_socket_c0_residency(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_cpu_socket_c0_residency(processors[i]) + ret = amdsmi.amdsmi_get_cpu_socket_c0_residency(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_cpu_socket_current_active_freq_limit(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_cpu_socket_current_active_freq_limit(processors[i]) + ret = amdsmi.amdsmi_get_cpu_socket_current_active_freq_limit(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_cpu_socket_energy(self): + self._print_func_name('') if self.TODO_SKIP_FAIL: - self.skipTest("Skipping test_get_cpu_socket_energy as it fails.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self.skipTest("Skipping test_get_cpu_socket_energy as it fails (IO Error).") + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_cpu_socket_energy(processors[i]) + ret = amdsmi.amdsmi_get_cpu_socket_energy(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_cpu_socket_freq_range(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_cpu_socket_freq_range(processors[i]) + ret = amdsmi.amdsmi_get_cpu_socket_freq_range(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_cpu_socket_lclk_dpm_level(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) + self._print_func_name('') nbio_id = 0 - for i in range(0, len(processors)): + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}): nbio_id({nbio_id}):' try: - msg = f'gpu({i}): nbio_id({nbio_id}): ' - ret = amdsmi.amdsmi_get_cpu_socket_lclk_dpm_level(processors[i], nbio_id) + ret = amdsmi.amdsmi_get_cpu_socket_lclk_dpm_level(gpu, nbio_id) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_cpu_socket_power(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_cpu_socket_power(processors[i]) + ret = amdsmi.amdsmi_get_cpu_socket_power(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - - - def test_get_cpu_socket_power_cap(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): - try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_cpu_socket_power_cap(processors[i]) - self._print(msg, ret) - except amdsmi.AmdSmiLibraryException as e: - if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_cpu_socket_power_cap_max(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_cpu_socket_power_cap_max(processors[i]) + ret = amdsmi.amdsmi_get_cpu_socket_power_cap_max(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_cpu_socket_temperature(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_cpu_socket_temperature(processors[i]) + ret = amdsmi.amdsmi_get_cpu_socket_temperature(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_energy_count(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + if self.TODO_SKIP_FAIL: + self.skipTest("Skipping test_get_energy_count as it fails (MI350X, AMDSMI_STATUS_UNEXPECTED_DATA).") + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_energy_count(processors[i]) + ret = amdsmi.amdsmi_get_energy_count(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_esmi_err_msg(self): - if self.TODO_SKIP_NYI: - self.skipTest("Skipping test_get_esmi_err_msg as it is not implemented yet.") - raise_exception = None - - for status_num in error_map: + self._print_func_name('') + if self.TODO_SKIP_FAIL: + self.skipTest("Skipping test_get_esmi_err_msg as it fails (Unknown Error).") + for status_type_name, status_type, status_cond in self.status_types: + msg = f'status(AMDSMI_STATUS_{status_type_name}):' try: - msg = f'status({error_map[status_num]}): ' - ret = amdsmi.amdsmi_get_esmi_err_msg(error_map[status_num]) + ret = amdsmi.amdsmi_get_esmi_err_msg(status_type) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: - if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + if self._check_ret(msg, e, status_cond): + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_fw_info(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f"gpu({i}): " - ret = amdsmi.amdsmi_get_fw_info(processors[i]) + ret = amdsmi.amdsmi_get_fw_info(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_accelerator_partition_profile(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_accelerator_partition_profile(processors[i]) + ret = amdsmi.amdsmi_get_gpu_accelerator_partition_profile(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_accelerator_partition_profile_config(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_accelerator_partition_profile_config(processors[i]) + ret = amdsmi.amdsmi_get_gpu_accelerator_partition_profile_config(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_activity(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + if self.TODO_SKIP_FAIL: + self.skipTest("Skipping test_get_gpu_activity as it fails (MI350X, AMDSMI_STATUS_UNEXPECTED_DATA).") + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_activity(processors[i]) + ret = amdsmi.amdsmi_get_gpu_activity(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_asic_info(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'### test amdsmi_get_gpu_asic_info(gpu={i})' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_asic_info(processors[i]) + ret = amdsmi.amdsmi_get_gpu_asic_info(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_bad_page_info(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_bad_page_info(processors[i]) + ret = amdsmi.amdsmi_get_gpu_bad_page_info(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_bad_page_threshold(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_bad_page_threshold(processors[i]) + ret = amdsmi.amdsmi_get_gpu_bad_page_threshold(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_bdf_id(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_bdf_id(processors[i]) + ret = amdsmi.amdsmi_get_gpu_bdf_id(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_board_info(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'### test amdsmi_get_gpu_board_info(gpu={i})' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_board_info(processors[i]) + ret = amdsmi.amdsmi_get_gpu_board_info(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_cache_info(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = gpu_cache_infos = amdsmi.amdsmi_get_gpu_cache_info(processors[i]) + ret = amdsmi.amdsmi_get_gpu_cache_info(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_compute_partition(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_compute_partition(processors[i]) + ret = amdsmi.amdsmi_get_gpu_compute_partition(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_compute_process_gpus(self): - if self.TODO_SKIP_FAIL: - self.skipTest("Skipping test_get_gpu_compute_process_gpus as it fails.") - raise_exception = None - - # TODO pid = 0 + self._print_func_name('') + if self.TODO_SKIP_NOT_COMPLETE: + self.skipTest("Skipping test_get_gpu_compute_process_gpus as it is not complete (Inval Error).") + # TODO Find better way to get pid pid = 0 + msg = f'pid({pid}):' try: - msg = f'pid({pid}): ' - ret = gpu_compute_process_gpuss = amdsmi.amdsmi_get_gpu_compute_process_gpus(pid) + ret = amdsmi.amdsmi_get_gpu_compute_process_gpus(pid) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: - if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + if self._check_ret(msg, e, self.PASS): + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_compute_process_info(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): - try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_compute_process_info() - self._print(msg, ret) - except amdsmi.AmdSmiLibraryException as e: - if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self._print_func_name('') + msg = '' + try: + ret = amdsmi.amdsmi_get_gpu_compute_process_info() + self._print(msg, ret) + except amdsmi.AmdSmiLibraryException as e: + if self._check_ret(msg, e, self.PASS): + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_compute_process_info_by_pid(self): - if self.TODO_SKIP_FAIL: - self.skipTest("Skipping test_get_gpu_compute_process_info_by_pid as it fails.") - raise_exception = None - - # TODO pid = 0 + self._print_func_name('') + if self.TODO_SKIP_NOT_COMPLETE: + self.skipTest("Skipping test_get_gpu_compute_process_info_by_pid as it not complete (Device not found).") + # TODO Find better way to get pid pid = 0 + msg = f'pid({pid}):' try: - msg = f'pid({pid}): ' ret = amdsmi.amdsmi_get_gpu_compute_process_info_by_pid(pid) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: - if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + if self._check_ret(msg, e, self.PASS): + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_device_bdf(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_device_bdf(processors[i]) + ret = amdsmi.amdsmi_get_gpu_device_bdf(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_device_uuid(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_device_uuid(processors[i]) + ret = amdsmi.amdsmi_get_gpu_device_uuid(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_driver_info(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_driver_info(processors[i]) + ret = amdsmi.amdsmi_get_gpu_driver_info(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_ecc_count(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): for gpu_block_name, gpu_block, gpu_block_cond in self.gpu_blocks: + msg = f'gpu({i}): gpu_block({gpu_block_name})' try: - msg = f'gpu({i}): gpu_block({gpu_block_name}) ' - ret = amdsmi.amdsmi_get_gpu_ecc_count(processors[i], gpu_block) + ret = amdsmi.amdsmi_get_gpu_ecc_count(gpu, gpu_block) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, gpu_block_cond): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_ecc_enabled(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_ecc_enabled(processors[i]) + ret = amdsmi.amdsmi_get_gpu_ecc_enabled(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_ecc_status(self): + self._print_func_name('') if self.TODO_SKIP_FAIL: self.skipTest("Skipping test_get_gpu_ecc_status as it fails.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + for i, gpu in enumerate(self.processors): for gpu_block_name, gpu_block, gpu_block_cond in self.gpu_blocks: + msg = f'gpu({i}): gpu_block({gpu_block_name})' try: - msg = f'gpu({i}): gpu_block({gpu_block_name}) ' - ret = amdsmi.amdsmi_get_gpu_ecc_status(processors[i], gpu_block) + ret = amdsmi.amdsmi_get_gpu_ecc_status(gpu, gpu_block) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, gpu_block_cond): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_enumeration_info(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_enumeration_info(processors[i]) + ret = amdsmi.amdsmi_get_gpu_enumeration_info(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - - - def test_get_gpu_event_notification(self): - if self.TODO_SKIP_NYI: - self.skipTest("Skipping test_get_gpu_event_notification as it is not implemented yet.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): - try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_event_notification(processors[i]) - self._print(msg, ret) - except amdsmi.AmdSmiLibraryException as e: - if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_fan_rpms(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_fan_rpms(processors[i], 0) + ret = amdsmi.amdsmi_get_gpu_fan_rpms(gpu, 0) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - - - def test_get_gpu_fan_speed(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): - try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_fan_speed(processors[i], 0) - self._print(msg, ret) - except amdsmi.AmdSmiLibraryException as e: - if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - - - def test_get_gpu_fan_speed_max(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): - try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_fan_speed_max(processors[i], 0) - self._print(msg, ret) - except amdsmi.AmdSmiLibraryException as e: - if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_id(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_id(processors[i]) + ret = amdsmi.amdsmi_get_gpu_id(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_kfd_info(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_kfd_info(processors[i]) + ret = amdsmi.amdsmi_get_gpu_kfd_info(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_mem_overdrive_level(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_mem_overdrive_level(processors[i]) + ret = amdsmi.amdsmi_get_gpu_mem_overdrive_level(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_memory_partition(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_memory_partition(processors[i]) + ret = amdsmi.amdsmi_get_gpu_memory_partition(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_memory_partition_config(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + if self.TODO_SKIP_FAIL: + self.skipTest("Skipping test_get_gpu_memory_partition_config as it fails on MI300.") + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_memory_partition_config(processors[i]) + ret = amdsmi.amdsmi_get_gpu_memory_partition_config(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_memory_reserved_pages(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_memory_reserved_pages(processors[i]) + ret = amdsmi.amdsmi_get_gpu_memory_reserved_pages(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_memory_total(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): for memory_type_name, memory_type, memory_type_cond in self.memory_types: + msg = f'gpu({i}): memory_type({memory_type_name})' try: - msg = f'gpu({i}): memory_type({memory_type_name}) ' - ret = amdsmi.amdsmi_get_gpu_memory_total(processors[i], memory_type) + ret = amdsmi.amdsmi_get_gpu_memory_total(gpu, memory_type) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, memory_type_cond): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_memory_usage(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): for memory_type_name, memory_type, memory_type_cond in self.memory_types: + msg = f'gpu({i}): memory_type({memory_type_name})' try: - msg = f'gpu({i}): memory_type({memory_type_name}) ' - ret = amdsmi.amdsmi_get_gpu_memory_usage(processors[i], memory_type) + ret = amdsmi.amdsmi_get_gpu_memory_usage(gpu, memory_type) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, memory_type_cond): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_metrics_header_info(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_metrics_header_info(processors[i]) + ret = amdsmi.amdsmi_get_gpu_metrics_header_info(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_metrics_info(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + if self.TODO_SKIP_FAIL: + self.skipTest("Skipping test_get_gpu_metrics_info as it fails (MI350X, AMDSMI_STATUS_UNEXPECTED_DATA).") + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_metrics_info(processors[i]) + ret = amdsmi.amdsmi_get_gpu_metrics_info(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_od_volt_curve_regions(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) + self._print_func_name('') num_region = 10 - for i in range(0, len(processors)): + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}): num_region({num_region}):' try: - msg = f'gpu({i}): num_region({num_region}): ' - ret = amdsmi.amdsmi_get_gpu_od_volt_curve_regions(processors[i], num_region) + ret = amdsmi.amdsmi_get_gpu_od_volt_curve_regions(gpu, num_region) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_od_volt_info(self): - if self.TODO_SKIP_FAIL: - self.skipTest("Skipping test_get_gpu_od_volt_info as it fails.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_od_volt_info(processors[i]) + ret = amdsmi.amdsmi_get_gpu_od_volt_info(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_overdrive_level(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_overdrive_level(processors[i]) + ret = amdsmi.amdsmi_get_gpu_overdrive_level(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_pci_bandwidth(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + if self.TODO_SKIP_FAIL: + self.skipTest("Skipping test_get_gpu_pci_bandwidth as it fails (MI350X, AMDSMI_STATUS_UNEXPECTED_DATA).") + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_pci_bandwidth(processors[i]) + ret = amdsmi.amdsmi_get_gpu_pci_bandwidth(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_pci_replay_counter(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + # TODO Check test_get_gpu_pci_replay_counter + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_pci_replay_counter(processors[i]) + ret = amdsmi.amdsmi_get_gpu_pci_replay_counter(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_pci_throughput(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_pci_throughput(processors[i]) + ret = amdsmi.amdsmi_get_gpu_pci_throughput(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_perf_level(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_perf_level(processors[i]) + ret = amdsmi.amdsmi_get_gpu_perf_level(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_pm_metrics_info(self): - if self.TODO_SKIP_FAIL: - self.skipTest("Skipping test_get_gpu_pm_metrics_info as it fails.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_pm_metrics_info(processors[i]) + ret = amdsmi.amdsmi_get_gpu_pm_metrics_info(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_power_profile_presets(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_power_profile_presets(processors[i], 0) + ret = amdsmi.amdsmi_get_gpu_power_profile_presets(gpu, 0) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_process_isolation(self): - if self.TODO_SKIP_NYI: - self.skipTest("Skipping test_get_gpu_process_isolation as it is not implemented yet.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_process_isolation(processors[i]) + ret = amdsmi.amdsmi_get_gpu_process_isolation(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_process_list(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_process_list(processors[i]) + ret = amdsmi.amdsmi_get_gpu_process_list(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_ras_block_features_enabled(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_ras_block_features_enabled(processors[i]) + ret = amdsmi.amdsmi_get_gpu_ras_block_features_enabled(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_ras_feature_info(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_ras_feature_info(processors[i]) + ret = amdsmi.amdsmi_get_gpu_ras_feature_info(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_reg_table_info(self): + self._print_func_name('') if self.TODO_SKIP_FAIL: - self.skipTest("Skipping test_get_gpu_reg_table_info as it fails.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self.skipTest("Skipping test_get_gpu_reg_table_info as it fails on MI300.") + for i, gpu in enumerate(self.processors): for reg_type_name, reg_type, reg_type_cond in self.reg_types: + msg = f'gpu({i}): reg_type({reg_type_name}):' try: - msg = f'gpu({i}): reg_type({reg_type_name}): ' - ret = amdsmi.amdsmi_get_gpu_reg_table_info(processors[i], reg_type) + ret = amdsmi.amdsmi_get_gpu_reg_table_info(gpu, reg_type) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, reg_type_cond): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_revision(self): - if self.TODO_SKIP_NYI: - self.skipTest("Skipping test_get_gpu_revision as it is not implemented yet.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_revision(processors[i]) + ret = amdsmi.amdsmi_get_gpu_revision(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_subsystem_id(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_subsystem_id(processors[i]) + ret = amdsmi.amdsmi_get_gpu_subsystem_id(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_subsystem_name(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_subsystem_name(processors[i]) + ret = amdsmi.amdsmi_get_gpu_subsystem_name(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_topo_numa_affinity(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_topo_numa_affinity(processors[i]) + ret = amdsmi.amdsmi_get_gpu_topo_numa_affinity(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_total_ecc_count(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_total_ecc_count(processors[i]) + ret = amdsmi.amdsmi_get_gpu_total_ecc_count(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_vbios_info(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_vbios_info(processors[i]) + ret = amdsmi.amdsmi_get_gpu_vbios_info(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_vendor_name(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_vendor_name(processors[i]) + ret = amdsmi.amdsmi_get_gpu_vendor_name(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_virtualization_mode(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_virtualization_mode(processors[i]) + ret = amdsmi.amdsmi_get_gpu_virtualization_mode(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_volt_metric(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): for voltage_type_name, voltage_type, voltage_type_cond in self.voltage_types: for voltage_metric_name, voltage_metric, voltage_metric_cond in self.voltage_metrics: + msg = f'gpu({i}): voltage_type({voltage_type_name}) voltage_metric({voltage_metric_name}):' try: - msg = f'gpu({i}): voltage_type({voltage_type_name}) voltage_metric({voltage_metric_name}): ' - ret = amdsmi.amdsmi_get_gpu_volt_metric(processors[i], voltage_type, voltage_metric) + ret = amdsmi.amdsmi_get_gpu_volt_metric(gpu, voltage_type, voltage_metric) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if not voltage_type_cond == self.PASS: if self._check_ret(msg, e, voltage_type_cond): - raise_exception = e + self.raise_exception = e elif not voltage_metric_cond == self.PASS: if self._check_ret(msg, e, voltage_metric_cond): - raise_exception = e + self.raise_exception = e else: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_vram_info(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_vram_info(processors[i]) + ret = amdsmi.amdsmi_get_gpu_vram_info(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_vram_usage(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_vram_usage(processors[i]) + ret = amdsmi.amdsmi_get_gpu_vram_usage(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_vram_vendor(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_vram_vendor(processors[i]) + ret = amdsmi.amdsmi_get_gpu_vram_vendor(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_xcd_counter(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + if self.TODO_SKIP_FAIL: + self.skipTest("Skipping test_get_gpu_xcd_counter as it fails (MI350X, AMDSMI_STATUS_UNEXPECTED_DATA).") + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_xcd_counter(processors[i]) + ret = amdsmi.amdsmi_get_gpu_xcd_counter(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_xgmi_link_status(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + if self.TODO_SKIP_FAIL: + self.skipTest("Skipping test_get_gpu_xgmi_link_status as it fails (MI350X, AMDSMI_STATUS_UNEXPECTED_DATA).") + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_gpu_xgmi_link_status(processors[i]) + ret = amdsmi.amdsmi_get_gpu_xgmi_link_status(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_hsmp_metrics_table(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_hsmp_metrics_table(processors[i]) + ret = amdsmi.amdsmi_get_hsmp_metrics_table(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_hsmp_metrics_table_version(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_hsmp_metrics_table_version(processors[i]) + ret = amdsmi.amdsmi_get_hsmp_metrics_table_version(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_lib_version(self): - raise_exception = None - + self._print_func_name('') + msg = '' try: - msg = f'' ret = amdsmi.amdsmi_get_lib_version() self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: - if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + if self._check_ret(msg, e, self.PASS): + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_link_metrics(self): + self._print_func_name('') if self.TODO_SKIP_FAIL: - self.skipTest("Skipping test_get_link_metrics as it fails.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self.skipTest("Skipping test_get_link_metrics as it fails (MI350X, AMDSMI_STATUS_UNEXPECTED_DATA).") + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_link_metrics(processors[i]) + ret = amdsmi.amdsmi_get_link_metrics(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_link_topology_nearest(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): for link_type_name, link_type, link_type_cond in self.link_types: + msg = f'gpu({i}): link_type({link_type_name})' try: - msg = f'gpu({i}): link_type({link_type_name}) ' - ret = amdsmi.amdsmi_get_link_topology_nearest(processors[i], link_type) + ret = amdsmi.amdsmi_get_link_topology_nearest(gpu, link_type) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, link_type_cond): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_minmax_bandwidth_between_processors(self): - if self.TODO_SKIP_FAIL: - self.skipTest("Skipping test_get_minmax_bandwidth_between_processors as it fails.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): - for j in range(0, len(processors)): + self._print_func_name('') + for i, gpu_i in enumerate(self.processors): + for j, gpu_j in enumerate(self.processors): + msg = f'gpu({i},{j}):' try: - msg = f'gpu({i},{j}): ' - ret = amdsmi.amdsmi_get_minmax_bandwidth_between_processors(processors[i], processors[j]) + ret = amdsmi.amdsmi_get_minmax_bandwidth_between_processors(gpu_i, gpu_j) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: - if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + if i == j: + if self._check_ret(msg, e, self.FAIL): + self.raise_exception = e + else: + if self._check_ret(msg, e, self.PASS): + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_pcie_info(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + if self.TODO_SKIP_FAIL: + self.skipTest("Skipping test_get_pcie_info as it fails (MI350X, AMDSMI_STATUS_UNEXPECTED_DATA).") + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_pcie_info(processors[i]) + ret = amdsmi.amdsmi_get_pcie_info(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return - - - def test_get_power_cap_info(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + def test_set_cpu_pcie_link_rate(self): + self._print_func_name('') + if self.TODO_SKIP_NOT_COMPLETE: + self.skipTest("Skipping test_set_cpu_pcie_link_rate as it is not complete.") + # TODO rate_ctrl = 0 + rate_ctrl = 0 + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}): rate_ctrl({rate_ctrl}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_power_cap_info(processors[i]) + ret = amdsmi.amdsmi_set_cpu_pcie_link_rate(gpu, rate_ctrl) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_power_info(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = power_info = amdsmi.amdsmi_get_power_info(processors[i]) + ret = amdsmi.amdsmi_get_power_info(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_processor_count_from_handles(self): - if self.TODO_SKIP_NYI: - self.skipTest("Skipping test_get_processor_count_from_handles as it is not implemented yet.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): - try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_processor_count_from_handles(processors[i]) - self._print(msg, ret) - except amdsmi.AmdSmiLibraryException as e: - if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - - - def test_get_processor_handle_from_bdf(self): - if self.TODO_SKIP_FAIL: - self.skipTest("Skipping test_get_processor_handle_from_bdf as it fails.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - #TODO: define bdf - bdf = 0 - for i in range(0, len(processors)): - try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_processor_handle_from_bdf(bdf) - self._print(msg, ret) - except amdsmi.AmdSmiLibraryException as e: - if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - - - def test_get_processor_handles(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): - self._print(f' {i:2d} processor_handles: {processors[i]}') - if raise_exception: - raise raise_exception - - - - def test_get_processor_handles_by_type(self): - if self.TODO_SKIP_NYI: - self.skipTest("Skipping test_get_processor_handles_by_type as it is not implemented yet.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): - try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_processor_handles_by_type(processors[i]) - self._print(msg, ret) - except amdsmi.AmdSmiLibraryException as e: - if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - - - def test_get_processor_info(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): - try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_processor_info(processors[i]) - self._print(msg, ret) - except amdsmi.AmdSmiLibraryException as e: - if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - - - def test_get_processor_type(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): - try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_processor_type(processors[i]) - self._print(msg, ret) - except amdsmi.AmdSmiLibraryException as e: - if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - - - def test_get_soc_pstate(self): - if self.TODO_SKIP_NYI: - self.skipTest("Skipping test_get_soc_pstate as it is not implemented yet.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): - try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_soc_pstate(processors[i]) - self._print(msg, ret) - except amdsmi.AmdSmiLibraryException as e: - if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - - - def test_get_socket_handles(self): - if self.TODO_SKIP_FAIL: - self.skipTest("Skipping test_get_socket_handles as it fails.") - raise_exception = None - + self._print_func_name('') + msg = 'gpu():' try: - msg = f'' - ret = amdsmi.amdsmi_get_socket_handles() + ret = amdsmi.amdsmi_get_processor_count_from_handles(self.processors) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: - if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception + if self._check_ret(msg, e, self.PASS): + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return - - - def test_get_socket_info(self): - raise_exception = None - - sockets = amdsmi.amdsmi_get_socket_handles() - self.assertGreaterEqual(len(sockets), 1) - # TODO Find maximum number of sockets - self.assertLessEqual(len(sockets), 32) - for i in range(0, len(sockets)): + def test_get_processor_handle_from_bdf(self): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_socket_info(sockets[i]) + bdf = amdsmi.amdsmi_get_gpu_device_bdf(gpu) + ret = amdsmi.amdsmi_get_processor_handle_from_bdf(bdf) + if gpu.value != ret.value: + msg += f'{msg}Expected: {gpu.value}, Received: {ret.value}' + self.raise_exception = amdsmi.AmdSmiLibraryException(amdsmi.amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_INVAL) + else: + self._print(msg) + except amdsmi.AmdSmiLibraryException as e: + if self._check_ret(msg, e, self.PASS): + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return + + def test_get_processor_handles(self): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + self._print(f' {i:2d} processor_handles: {gpu}') + if self.raise_exception: + raise self.raise_exception + return + + def test_get_processor_handles_by_type(self): + self._print_func_name('') + socket_ids = amdsmi.amdsmi_get_socket_handles() + for index, socket_id in enumerate(socket_ids): + for processor_name, processor_type, processor_cond in self.processor_types: + msg = f'socket({index}): processor_type({processor_name}):' + try: + ret = amdsmi.amdsmi_get_processor_handles_by_type(socket_id, processor_type) + self._print(msg, ret) + except amdsmi.AmdSmiLibraryException as e: + if self._check_ret(msg, e, processor_cond): + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return + + def test_get_processor_info(self): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' + try: + ret = amdsmi.amdsmi_get_processor_info(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return + def test_get_processor_type(self): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' + try: + ret = amdsmi.amdsmi_get_processor_type(gpu) + self._print(msg, ret) + except amdsmi.AmdSmiLibraryException as e: + if self._check_ret(msg, e, self.PASS): + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return + def test_get_socket_handles(self): + self._print_func_name('') + msg = '' + try: + ret = amdsmi.amdsmi_get_socket_handles() + self._print(msg, [id(addr) for addr in ret]) + except amdsmi.AmdSmiLibraryException as e: + if self._check_ret(msg, e, self.PASS): + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return + + def test_get_socket_info(self): + self._print_func_name('') + sockets = amdsmi.amdsmi_get_socket_handles() + self.assertGreaterEqual(len(sockets), 1) + self.assertLessEqual(len(sockets), self.max_num_physical_devices) + for i, socket in enumerate(sockets): + msg = f'socket({i}):' + try: + ret = amdsmi.amdsmi_get_socket_info(socket) + self._print(msg, ret) + except amdsmi.AmdSmiLibraryException as e: + if self._check_ret(msg, e, self.PASS): + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_temp_metric(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + if self.TODO_SKIP_FAIL: + self.skipTest("Skipping test_get_temp_metric as it fails (MI350X, AMDSMI_STATUS_UNEXPECTED_DATA).") + for i, gpu in enumerate(self.processors): for temperature_type_name, temperature_type, temperature_type_cond in self.temperature_types: for temperature_metric_name, temperature_metric, temperature_metric_cond in self.temperature_metrics: + msg = f'gpu({i}): temperature_type=({temperature_type_name}) temperature_metric({temperature_metric_name}):' try: - msg = f'gpu({i}): temperature_type=({temperature_type_name}) temperature_metric({temperature_metric_name}): ' - ret = amdsmi.amdsmi_get_temp_metric(processors[i], temperature_type, temperature_metric) + ret = amdsmi.amdsmi_get_temp_metric(gpu, temperature_type, temperature_metric) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if not temperature_type_cond == self.PASS: if self._check_ret(msg, e, temperature_type_cond): - raise_exception = e + self.raise_exception = e elif not temperature_metric_cond == self.PASS: if self._check_ret(msg, e, temperature_metric_cond): - raise_exception = e + self.raise_exception = e else: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_threads_per_core(self): + self._print_func_name('') if self.TODO_SKIP_FAIL: - self.skipTest("Skipping test_get_threads_per_core as it fails.") - raise_exception = None - - # TODO threads_per_core + self.skipTest("Skipping test_get_threads_per_core as it fails (IO Error).") + # TODO threads_per_core + msg = 'threads_per_core:' try: - msg = f'threads_per_core: ' ret = amdsmi.amdsmi_get_threads_per_core() self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_utilization_count(self): + self._print_func_name('') if self.TODO_SKIP_FAIL: - self.skipTest("Skipping test_get_utilization_count as it fails.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self.skipTest("Skipping test_get_utilization_count as it fails (MI350X, AMDSMI_STATUS_UNEXPECTED_DATA).") + for i, gpu in enumerate(self.processors): for utilization_counter_type_name, utilization_counter_type, utilization_counter_type_cond in self.utilization_counter_types: + msg = f'gpu({i}): utilization_counter_type({utilization_counter_type_name}):' try: - msg = f'gpu({i}): utilization_counter_type({utilization_counter_type_name}): ' - ret = amdsmi.amdsmi_get_utilization_count(processors[i], utilization_counter_type) + ret = amdsmi.amdsmi_get_utilization_count(gpu, [utilization_counter_type]) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, utilization_counter_type_cond): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_violation_status(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + if self.TODO_SKIP_FAIL: + self.skipTest("Skipping test_get_violation_status as it fails (MI350X, AMDSMI_STATUS_UNEXPECTED_DATA).") + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_violation_status(processors[i]) + ret = amdsmi.amdsmi_get_violation_status(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_xgmi_info(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_xgmi_info(processors[i]) + ret = amdsmi.amdsmi_get_xgmi_info(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return + def test_gpu_counter(self): + self._print_func_name('') + if self.TODO_SKIP_FAIL: + self.skipTest("Skipping test_gpu_counter as it fails (Error opening file).") + for i, gpu in enumerate(self.processors): + for event_type_name, event_type, event_type_cond in self.event_types: + msg = f'gpu({i}): event_type({event_type_name}):' + # Create + msg1 = f'{msg} Create counter:' + try: + event_handle = amdsmi.amdsmi_gpu_create_counter(gpu, event_type) + self._print(msg1, event_handle) + except amdsmi.AmdSmiLibraryException as e: + if self._check_ret(msg1, e, event_type_cond): + self.raise_exception = e + # if any exception occurs, skip the rest of the loop + continue - def test_get_xgmi_plpd(self): - if self.TODO_SKIP_NYI: - self.skipTest("Skipping test_get_xgmi_plpd as it is not implemented yet.") - raise_exception = None + # Read + msg1 = f'{msg} Read counter:' + try: + amdsmi.amdsmi_gpu_read_counter(event_handle) + self._print(msg1) + except amdsmi.AmdSmiLibraryException as e: + if self._check_ret(msg1, e, event_type_cond): + self.raise_exception = e - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): - try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_get_xgmi_plpd(processors[i]) - self._print(msg, ret) - except amdsmi.AmdSmiLibraryException as e: - if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception + # Control + for counter_command_name, counter_command, counter_commands_cond in self.counter_commands: + msg1 = f'{msg} event_type({event_type_name}): counter_command({counter_command_name}):' + try: + amdsmi.amdsmi_gpu_control_counter(event_handle, counter_command) + self._print(msg1, '') + except amdsmi.AmdSmiLibraryException as e: + if self._check_ret(msg1, e, counter_commands_cond): + self.raise_exception = e + # Destroy + msg1 = f'{msg} Destroy counter:' + try: + amdsmi.amdsmi_gpu_destroy_counter(event_handle) + self._print(msg1, '') + except amdsmi.AmdSmiLibraryException as e: + if self._check_ret(msg1, e, event_type_cond): + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_gpu_counter_group_supported(self): - if self.TODO_SKIP_FAIL: - self.skipTest("Skipping test_gpu_counter_group_supported as it fails.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): for event_group_name, event_group, event_group_cond in self.event_groups: + msg = f'gpu({i}): event_group({event_group_name}):' try: - msg = f'gpu({i}): event_group({event_group_name}): ' - ret = amdsmi.amdsmi_gpu_counter_group_supported(processors[i], event_group) - self._print(msg, ret) + amdsmi.amdsmi_gpu_counter_group_supported(gpu, event_group) + self._print(msg, '') except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, event_group_cond): - raise_exception = e - if raise_exception: - raise raise_exception - - - if False: # TODO: create_counter, destroy_counter, read_counter, get_gpu_available_counters, gpu_control_counter - ''' Check these: - test_get_gpu_pci_replay_counter - test_get_gpu_xcd_counter - test_gpu_counter_group_supported - ''' - - - def test_gpu_create_counter(self): - if self.TODO_SKIP_FAIL: - self.skipTest("Skipping test_gpu_create_counter as it fails.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): - for event_type_name, event_type, event_type_cond in self.event_types: - try: - msg = f'gpu({i}): event_type({event_type_name}): ' - ret = amdsmi.amdsmi_gpu_create_counter(processors[i], event_type) - self._print(msg, ret) - except amdsmi.AmdSmiLibraryException as e: - if self._check_ret(msg, e, event_type_cond): - raise_exception = e - if raise_exception: - raise raise_exception - - - - def test_gpu_destroy_counter(self): - if self.TODO_SKIP_FAIL: - self.skipTest("Skipping test_gpu_destroy_counter as it fails.") - raise_exception = None - - # TODO event_handle = 0 - event_handle = 0 - try: - msg = f'event_handle({event_handle}): ' - ret = event_handle - amdsmi.amdsmi_gpu_destroy_counter(event_handle) - self._print(msg, ret) - except amdsmi.AmdSmiLibraryException as e: - if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - - - def test_gpu_read_counter(self): - if self.TODO_SKIP_FAIL: - self.skipTest("Skipping test_gpu_read_counter as it fails.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - # TODO event_handle = 0 - event_handle = 0 - for i in range(0, len(processors)): - try: - msg = f'gpu({i}): event_handle({event_handle}): ' - ret = amdsmi.amdsmi_gpu_read_counter(event_handle) - self._print(msg, ret) - except amdsmi.AmdSmiLibraryException as e: - if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_get_gpu_available_counters(self): - if self.TODO_SKIP_FAIL: - self.skipTest("Skipping test_get_gpu_available_counters as it fails.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): for event_group_name, event_group_type, event_group_cond in self.event_groups: + msg = f'gpu({i}): event_group({event_group_name})' try: - msg = f'gpu({i}): event_group({event_group_name}) ' - ret = amdsmi.amdsmi_get_gpu_available_counters(processors[i], event_group_type) + ret = amdsmi.amdsmi_get_gpu_available_counters(gpu, event_group_type) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, event_group_cond): - raise_exception = e - if raise_exception: - raise raise_exception - - - - def test_gpu_control_counter(self): - if self.TODO_SKIP_FAIL: - self.skipTest("Skipping test_gpu_control_counter as it fails.") - raise_exception = None - - # TODO event_handle = 0 - event_handle = 0 - for counter_command_name, counter_command, counter_commands_cond in self.counter_commands: - try: - msg = f'counter_command({counter_command_name}): ' - ret = amdsmi.amdsmi_gpu_control_counter(event_handle, counter_command) - self._print(msg, ret) - except amdsmi.AmdSmiLibraryException as e: - if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_gpu_validate_ras_eeprom(self): - if self.TODO_SKIP_NYI: - self.skipTest("Skipping test_gpu_validate_ras_eeprom as it is not implemented yet.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + if self.TODO_SKIP_FAIL: + self.skipTest("Skipping test_gpu_validate_ras_eepromas it fails (File Error).") + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_gpu_validate_ras_eeprom(processors[i]) - self._print(msg, ret) + amdsmi.amdsmi_gpu_validate_ras_eeprom(gpu) + self._print(msg, '') except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_gpu_xgmi_error_status(self): - self.skipTest("Skipping test_gpu_xgmi_error_status currently not a valid test - temporarily disabled.") - # See information in xgmi_read_write.cc file, it also skips this test for all ASICs. - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + if self.TODO_SKIP_FAIL: + self.skipTest("Skipping test_gpu_xgmi_error_status as it fails on MI300.") + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_gpu_xgmi_error_status(processors[i]) + ret = amdsmi.amdsmi_gpu_xgmi_error_status(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_init(self): - raise_exception = None - + self._print_func_name('') + msg = '' try: - msg = f'' - ret = amdsmi.amdsmi_init() - self._print(msg, ret) + amdsmi.amdsmi_init() + self._print(msg, '') except amdsmi.AmdSmiLibraryException as e: - if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + if self._check_ret(msg, e, self.PASS): + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_shut_down(self): - raise_exception = None - + self._print_func_name('') + msg = '' try: - msg = f'' - ret = amdsmi.amdsmi_shut_down() - self._print(msg, ret) + amdsmi.amdsmi_shut_down() + self._print(msg, '') except amdsmi.AmdSmiLibraryException as e: - if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - - - def test_init_gpu_event_notification(self): - if self.TODO_SKIP_NYI: - self.skipTest("Skipping test_init_gpu_event_notification as it is not implemented yet.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): - try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_init_gpu_event_notification(processors[i]) - self._print(msg, ret) - except amdsmi.AmdSmiLibraryException as e: - if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + if self._check_ret(msg, e, self.PASS): + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_is_P2P_accessible(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): - for j in range(0, len(processors)): + self._print_func_name('') + for i, gpu_i in enumerate(self.processors): + for j, gpu_j in enumerate(self.processors): + msg = f'gpu({i},{j}):' try: - msg = f'gpu({i},{j}): ' - ret = amdsmi.amdsmi_is_P2P_accessible(processors[i], processors[j]) + ret = amdsmi.amdsmi_is_P2P_accessible(gpu_i, gpu_j) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return + def test_gpu_event(self): + self._print_func_name('') + if self.TODO_SKIP_FAIL: + self.skipTest("Skipping test_gpu_event as it fails (File Error).") + mask = 1 << (amdsmi.AmdSmiEvtNotificationType.GPU_PRE_RESET -1) | \ + 1 << (amdsmi.AmdSmiEvtNotificationType.GPU_POST_RESET -1) + timeout_ms = 1000 + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' - - def test_is_gpu_power_management_enabled(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + # Init try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_is_gpu_power_management_enabled(processors[i]) + self._print(f'{msg} amdsmi_init_gpu_event_notification()') + amdsmi.amdsmi_init_gpu_event_notification(gpu) + except amdsmi.AmdSmiLibraryException as e: + if self._check_ret(msg, e, self.PASS): + self.raise_exception = e + # Skip remaining tests on any exception when initializing + continue + + # Is Enabled + try: + self._print(f'{msg} amdsmi_is_gpu_power_management_enabled()') + ret = amdsmi.amdsmi_is_gpu_power_management_enabled(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception + self.raise_exception = e + # Set Mask + try: + self._print(f'{msg} amdsmi_set_gpu_event_notification_mask()') + amdsmi.amdsmi_set_gpu_event_notification_mask(gpu, mask) + except amdsmi.AmdSmiLibraryException as e: + if self._check_ret(msg, e, self.PASS): + self.raise_exception = e + # Get + try: + self._print(f'{msg} amdsmi_get_gpu_event_notification()') + ret = amdsmi.amdsmi_get_gpu_event_notification(timeout_ms) + self._print(msg, ret) + except amdsmi.AmdSmiLibraryException as e: + if self._check_ret(msg, e, self.PASS): + self.raise_exception = e + + # Stop + try: + self._print(f'{msg} amdsmi_stop_gpu_event_notification()') + amdsmi.amdsmi_stop_gpu_event_notification(gpu) + except amdsmi.AmdSmiLibraryException as e: + if self._check_ret(msg, e, self.PASS): + self.raise_exception = e + + if self.raise_exception: + raise self.raise_exception + return def test_reset_gpu(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + if self.TODO_SKIP_FAIL: + self.skipTest("Skipping test_reset_gpu as it fails (MI350X, Hang).") + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_reset_gpu(processors[i]) - self._print(msg, ret) + amdsmi.amdsmi_reset_gpu(gpu) + self._print(msg, '') except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_reset_gpu_fan(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_reset_gpu_fan(processors[i], 0) - self._print(msg, ret) + amdsmi.amdsmi_reset_gpu_fan(gpu, 0) + self._print(msg, '') except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_reset_gpu_xgmi_error(self): - self.skipTest("Skipping test_reset_gpu_xgmi_error currently not a valid test - temporarily disabled.") - # See information in xgmi_read_write.cc file, it also skips this test for all ASICs. - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + if self.TODO_SKIP_FAIL: + self.skipTest("Skipping test_reset_gpu_xgmi_error as it fails on MI300.") + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_reset_gpu_xgmi_error(processors[i]) - self._print(msg, ret) + amdsmi.amdsmi_reset_gpu_xgmi_error(gpu) + self._print(msg, '') except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_set_clk_freq(self): - if self.TODO_SKIP_NOT_COMPLETE: - self.skipTest("Skipping test_set_clk_freq as it is not complete.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - # TODO freq_bitmask = 0 - freq_bitmask = 0 - for i in range(0, len(processors)): + self._print_func_name('') + if self.TODO_SKIP_FAIL: + self.skipTest("Skipping test_set_clk_freq as it fails (Perm failure).") + for i, gpu in enumerate(self.processors): for clk_type_name, clk_type, clk_cond in self.clk_types: + msg = f'gpu({i}): Get Clock Info({clk_type_name}):' try: - msg = f'gpu({i}): clk_type({clk_type_name}): freq_bitmask({freq_bitmask}): ' - ret = amdsmi.amdsmi_set_clk_freq(processors[i], clk_type, freq_bitmask) + ret = amdsmi.amdsmi_get_clk_freq(gpu, clk_type) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, clk_cond): - raise_exception = e - if raise_exception: - raise raise_exception + self.raise_exception = e + continue + clk_freq_info = ret + current = clk_freq_info['current'] + num_supported = clk_freq_info['num_supported'] + frequency = clk_freq_info['frequency'] + if num_supported == 0: + self._print(f'No supported frequencies for clk_type={clk_type_name}') + continue + found_error = False + for index in range(0, num_supported): + msg = f'\tgpu({i}):' + try: + freq_bitmask = frequency[index] + msg = f'{msg} Set clk_type({clk_type_name}): freq_bitmask({freq_bitmask}):' + amdsmi.amdsmi_set_clk_freq(gpu, clk_type_name, freq_bitmask) + self._print(msg, '') + except amdsmi.AmdSmiLibraryException as e: + found_error = True + if self._check_ret(msg, e, clk_cond): + self.raise_exception = e + if not found_error: + amdsmi.amdsmi_set_clk_freq(gpu, clk_type_name, frequency[current]) + if self.raise_exception: + raise self.raise_exception + return - - - def test_set_cpu_core_boostlimit(self): - if self.TODO_SKIP_NOT_COMPLETE: - self.skipTest("Skipping test_set_cpu_core_boostlimit as it is not complete.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - # TODO boost_limit = 0 - boost_limit = 0 - for i in range(0, len(processors)): + def test_cpu_core_boostlimit(self): + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' + msg1 = f'{msg} amdsmi_get_cpu_core_boostlimit():' try: - msg = f'gpu({i}): boost_limit({boost_limit}): ' - ret = amdsmi.amdsmi_set_cpu_core_boostlimit(processors[i], boost_limit) - self._print(msg, ret) + boost_limit = amdsmi.amdsmi_get_cpu_core_boostlimit(gpu) + msg1 = f'{msg1} boost_limit={boost_limit}' + self._print(msg1, '') except amdsmi.AmdSmiLibraryException as e: - if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - + if self._check_ret(msg1, e, self.PASS): + self.raise_exception = e + continue + msg1 = f'{msg} amdsmi_set_cpu_core_boostlimit():' + try: + amdsmi.amdsmi_set_cpu_core_boostlimit(gpu, boost_limit) + self._print(msg1, '') + except amdsmi.AmdSmiLibraryException as e: + if self._check_ret(msg1, e, self.PASS): + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_set_cpu_df_pstate_range(self): + self._print_func_name('') if self.TODO_SKIP_NOT_COMPLETE: self.skipTest("Skipping test_set_cpu_df_pstate_range as it is not complete.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) # TODO max_pstate = 0, min_pstate = 0 max_pstate = 0 min_pstate = 0 - for i in range(0, len(processors)): + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}): max_pstate({max_pstate}) min_pstate({min_pstate}):' try: - msg = f'gpu({i}): max_pstate({max_pstate}) min_pstate({min_pstate}): ' - ret = amdsmi.amdsmi_set_cpu_df_pstate_range(processors[i], max_pstate, min_pstate) - self._print(msg, ret) + amdsmi.amdsmi_set_cpu_df_pstate_range(gpu, max_pstate, min_pstate) + self._print(msg, '') except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_set_cpu_gmi3_link_width_range(self): + self._print_func_name('') if self.TODO_SKIP_NOT_COMPLETE: self.skipTest("Skipping test_set_cpu_gmi3_link_width_range as it is not complete.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) # TODO min_link_width = 0, max_link_width = 0 min_link_width = 0 max_link_width = 0 - for i in range(0, len(processors)): + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}): min_link_width({min_link_width}) max_link_width({max_link_width}):' try: - msg = f'gpu({i}): min_link_width({min_link_width}) max_link_width({max_link_width}): ' - ret = amdsmi.amdsmi_set_cpu_gmi3_link_width_range(processors[i], min_link_width, max_link_width) - self._print(msg, ret) + amdsmi.amdsmi_set_cpu_gmi3_link_width_range(gpu, min_link_width, max_link_width) + self._print(msg, '') except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - - - def test_set_cpu_pcie_link_rate(self): - if self.TODO_SKIP_NOT_COMPLETE: - self.skipTest("Skipping test_set_cpu_pcie_link_rate as it is not complete.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - # TODO rate_ctrl = 0 - rate_ctrl = 0 - for i in range(0, len(processors)): - try: - msg = f'gpu({i}): rate_ctrl({rate_ctrl}): ' - ret = amdsmi.amdsmi_set_cpu_pcie_link_rate(processors[i], rate_ctrl) - self._print(msg, ret) - except amdsmi.AmdSmiLibraryException as e: - if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_set_cpu_pwr_efficiency_mode(self): - if self.TODO_SKIP_NYI: - self.skipTest("Skipping test_set_cpu_pwr_efficiency_mode as it is not implemented is not yet implemented.") - raise_exception = None + self._print_func_name('') + modes = [0, 1, 2] + for i, gpu in enumerate(self.processors): + for mode in modes: + msg = f'gpu({i}): mode({mode}):' + try: + amdsmi.amdsmi_set_cpu_pwr_efficiency_mode(gpu, mode) + self._print(msg, '') + except amdsmi.AmdSmiLibraryException as e: + if self._check_ret(msg, e, self.PASS): + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - # TODO mode = 0 - mode = 0 - for i in range(0, len(processors)): - try: - msg = f'gpu({i}): mode({mode}): ' - ret = amdsmi.set_cpu_pwr_efficiency_mode(processors[i], mode) - self._print(msg, ret) - except amdsmi.AmdSmiLibraryException as e: - if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - - - def test_set_cpu_socket_boostlimit(self): + def test_cpu_socket_boostlimit(self): + self._print_func_name('') if self.TODO_SKIP_NOT_COMPLETE: - self.skipTest("Skipping test_set_cpu_socket_boostlimit as it is not complete.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) + self.skipTest("Skipping test_cpu_socket_boostlimit as it is not complete.") # TODO boost_limit = 0 boost_limit = 0 - for i in range(0, len(processors)): + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' + msg1 = f'{msg} boost_limit({boost_limit}):' try: - msg = f'gpu({i}): boost_limit({boost_limit}): ' - ret = amdsmi.amdsmi_set_cpu_socket_boostlimit(processors[i], boost_limit) - self._print(msg, ret) + amdsmi.amdsmi_set_cpu_socket_boostlimit(gpu, boost_limit) + self._print(msg1, '') except amdsmi.AmdSmiLibraryException as e: - if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + if self._check_ret(msg1, e, self.PASS): + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_set_cpu_socket_lclk_dpm_level(self): + self._print_func_name('') if self.TODO_SKIP_NOT_COMPLETE: self.skipTest("Skipping test_set_cpu_socket_lclk_dpm_level as it is not complete.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) # TODO nbio_id = 0, min_val = 0, max_val = 0 nbio_id = 0 min_val = 0 max_val = 0 - for i in range(0, len(processors)): + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}): nbio_id({nbio_id}) min_val({min_val}) max_val({max_val}):' try: - msg = f'gpu({i}): nbio_id({nbio_id}) min_val({min_val}) max_val({max_val}): ' - ret = amdsmi.amdsmi_set_cpu_socket_lclk_dpm_level(processors[i], nbio_id, min_val, max_val) - self._print(msg, ret) + amdsmi.amdsmi_set_cpu_socket_lclk_dpm_level(gpu, nbio_id, min_val, max_val) + self._print(msg, '') except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return - - - def test_set_cpu_socket_power_cap(self): - if self.TODO_SKIP_NOT_COMPLETE: - self.skipTest("Skipping test_set_cpu_socket_power_cap as it is not complete.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - # TODO power_cap = 0 - power_cap = 0 - for i in range(0, len(processors)): + def test_cpu_socket_power_cap(self): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' + msg1 = f'{msg} amdsmi_get_cpu_socket_power_cap():' try: - msg = f'gpu({i}): power_cap({power_cap}): ' - ret = amdsmi.amdsmi_set_cpu_socket_power_cap(processors[i], power_cap) - self._print(msg, ret) + power_cap = amdsmi.amdsmi_get_cpu_socket_power_cap(gpu) + self._print(msg1, '') except amdsmi.AmdSmiLibraryException as e: - if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - + if self._check_ret(msg1, e, self.PASS): + self.raise_exception = e + continue + msg1 = f'{msg} power_cap={power_cap}' + try: + amdsmi.amdsmi_set_cpu_socket_power_cap(gpu, power_cap) + self._print(msg1, '') + except amdsmi.AmdSmiLibraryException as e: + if self._check_ret(msg1, e, self.PASS): + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_set_cpu_xgmi_width(self): + self._print_func_name('') if self.TODO_SKIP_NOT_COMPLETE: self.skipTest("Skipping test_set_cpu_xgmi_width as it is not complete.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) # TODO min_width = 0, max_width = 0 min_width = 0 max_width = 0 - for i in range(0, len(processors)): + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}): min_width({min_width} max_width({max_width}): )' try: - msg = f'gpu({i}): min_width({min_width} max_width({max_width}): )' - ret = amdsmi.amdsmi_set_cpu_xgmi_width(processors[i], min_width , max_width) - self._print(msg, ret) + amdsmi.amdsmi_set_cpu_xgmi_width(gpu, min_width , max_width) + self._print(msg, '') except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_set_gpu_accelerator_partition_profile(self): + self._print_func_name('') if self.TODO_SKIP_NOT_COMPLETE: self.skipTest("Skipping test_set_gpu_accelerator_partition_profile as it is not complete.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) # TODO profile_index = 0 profile_index = 0 - for i in range(0, len(processors)): + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}): profile_index({profile_index}):' try: - msg = f'gpu({i}): profile_index({profile_index}): ' - ret = amdsmi.amdsmi_set_gpu_accelerator_partition_profile(processors[i], profile_index) - self._print(msg, ret) + amdsmi.amdsmi_set_gpu_accelerator_partition_profile(gpu, profile_index) + self._print(msg, '') except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_set_gpu_clk_limit(self): + self._print_func_name('') if self.TODO_SKIP_NOT_COMPLETE: self.skipTest("Skipping test_set_gpu_clk_limit as it is not complete.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - # TODO value = 0 is for min and max, need to separate out values + # TODO Find better way to set value value = 0 - for i in range(0, len(processors)): + for i, gpu in enumerate(self.processors): for clk_type_name, clk_type, clk_cond in self.clk_types: for clk_limit_type_name, clk_limit_type, clk_limit_cond in self.clk_limit_types: + msg = f'gpu({i}): value({value}) clock_type=({clk_type_name}) clock_limit_type({clk_limit_type_name}):' try: - msg = f'gpu({i}): value({value}) clock_type=({clk_type_name}) clock_limit_type({clk_limit_type_name}): ' - ret = amdsmi.amdsmi_set_gpu_clk_limit(processors[i], clk_type, clk_limit_type, value) - self._print(msg, ret) + amdsmi.amdsmi_set_gpu_clk_limit(gpu, clk_type_name, clk_limit_type_name, value) + self._print(msg, '') except amdsmi.AmdSmiLibraryException as e: if not clk_cond == self.PASS: self._check_ret(msg, e, clk_cond) - raise_exception = e + self.raise_exception = e elif not clk_limit_type == self.PASS: self._check_ret(msg, e, clk_limit_type) - raise_exception = e + self.raise_exception = e else: self._check_ret(msg, e, self.PASS) - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_set_gpu_clk_range(self): - if self.TODO_SKIP_NOT_COMPLETE: - self.skipTest("Skipping test_set_gpu_clk_range as it is not complete.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - # TODO min_clk_value = 0 max_clk_value = 0 - min_clk_value = 0 - max_clk_value = 0 - for i in range(0, len(processors)): + self._print_func_name('') + # TODO Find better way to set min_clk_value, max_clk_value + min_clk_value = 100 + max_clk_value = 200 + for i, gpu in enumerate(self.processors): for clk_type_name, clk_type, clk_cond in self.clk_types: + msg = f'gpu({i}): min_clk_value({min_clk_value}) max_clk_value({max_clk_value}) clk_type({clk_type_name}):' try: - msg = f'gpu({i}): min_clk_value({min_clk_value}) max_clk_value({max_clk_value}) clk_type({clk_type_name}): ' - ret = amdsmi.amdsmi_set_gpu_clk_range(processors[i], min_clk_value, max_clk_value, clk_type) - self._print(msg, ret) + amdsmi.amdsmi_set_gpu_clk_range(gpu, min_clk_value, max_clk_value, clk_type) + self._print(msg, '') except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, clk_cond): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_set_gpu_compute_partition(self): - if self.TODO_SKIP_NOT_COMPLETE: - self.skipTest("Skipping test_set_gpu_compute_partition as it is not complete.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + if self.TODO_SKIP_FAIL: + self.skipTest("Skipping test_set_gpu_compute_partition as it fails on MI300.") + for i, gpu in enumerate(self.processors): + default_compute_partition_type = self.compute_partition_types[0][1] + msg = f'gpu({i}): amdsmi_get_gpu_compute_partition()' + try: + default_compute_partition_name = amdsmi.amdsmi_get_gpu_compute_partition(gpu) + except amdsmi.AmdSmiLibraryException as e: + if self._check_ret(msg, e, self.PASS): + self.raise_exception = e + continue for compute_partition_type_name, compute_partition_type, compute_partition_type_cond in self.compute_partition_types: + if default_compute_partition_name == compute_partition_type_name: + default_compute_partition_type = compute_partition_type + msg = f'gpu({i}): compute_partition_type({compute_partition_type_name}):' try: - msg = f'gpu({i}): compute_partition_type({compute_partition_type_name}): ' - ret = amdsmi.amdsmi_set_gpu_compute_partition(processors[i], compute_partition_type) - self._print(msg, ret) + amdsmi.amdsmi_set_gpu_compute_partition(gpu, compute_partition_type) + self._print(msg, '') except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, compute_partition_type_cond): - raise_exception = e - if raise_exception: - raise raise_exception - - - - def test_set_gpu_event_notification_mask(self): - if self.TODO_SKIP_NYI: - self.skipTest("Skipping test_set_gpu_event_notification_mask as it is not implemented yet.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self.raise_exception = e + msg = f'gpu({i}): amdsmi_set_gpu_compute_partition({default_compute_partition_name})' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_set_gpu_event_notification_mask(processors[i]) - self._print(msg, ret) + amdsmi.amdsmi_set_gpu_compute_partition(gpu, default_compute_partition_type) + self._print(msg, '') except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception + self.raise_exception = e + continue + if self.raise_exception: + raise self.raise_exception + return - - - def test_set_gpu_fan_speed(self): - if self.TODO_SKIP_NOT_COMPLETE: - self.skipTest("Skipping test_set_gpu_fan_speed as it is not complete.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - # TODO fan_speed = 0 - fan_speed = 0 - for i in range(0, len(processors)): + def test_gpu_fan_speed(self): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' + msg1 = f'{msg} amdsmi_get_gpu_fan_speed()' try: - msg = f'gpu({i}): fan_speed({fan_speed}): ' - ret = amdsmi.amdsmi_set_gpu_fan_speed(processors[i], 0, fan_speed) - self._print(msg, ret) + # Determine current fan speed + fan_speed_current = amdsmi.amdsmi_get_gpu_fan_speed(gpu, 0) + msg1 = f'{msg1} fan_speed={fan_speed_current}' + self._print(msg1, '') except amdsmi.AmdSmiLibraryException as e: - if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception + if self._check_ret(msg1, e, self.PASS): + self.raise_exception = e + continue + try: + # Determine max fan speed + msg1 = f'{msg} amdsmi_get_gpu_fan_speed_max()' + fan_speed_max = amdsmi.amdsmi_get_gpu_fan_speed_max(gpu, 0) + msg1 = f'{msg1} fan_speed_max={fan_speed_max}' + if fan_speed_current == fan_speed_max: + fan_speed = int(fan_speed_max/2) + else: + fan_speed = fan_speed_max + except amdsmi.AmdSmiLibraryException as e: + if self._check_ret(msg1, e, self.PASS): + self.raise_exception = e + continue + try: + # Set fan speed + msg1 = f'{msg} fan_speed({fan_speed}):' + amdsmi.amdsmi_set_gpu_fan_speed(gpu, 0, fan_speed) + self._print(msg1, '') + except amdsmi.AmdSmiLibraryException as e: + if self._check_ret(msg1, e, self.PASS): + self.raise_exception = e + + try: + # Set to original fan speed + msg1 = f'{msg} fan_speed({fan_speed_current}):' + amdsmi.amdsmi_set_gpu_fan_speed(gpu, 0, fan_speed_current) + self._print(msg1, '') + except amdsmi.AmdSmiLibraryException as e: + if self._check_ret(msg1, e, self.PASS): + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_set_gpu_memory_partition(self): - if self.TODO_SKIP_NOT_COMPLETE: - self.skipTest("Skipping test_set_gpu_memory_partition as it is not complete.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + if self.TODO_SKIP_FAIL: + self.skipTest("Skipping test_set_gpu_memory_partition as it fails (MI350X, AMDSMI_STATUS_UNEXPECTED_DATA).") + for i, gpu in enumerate(self.processors): for memory_partition_type_name, memory_partition_type, memory_partition_type_cond in self.memory_partition_types: + msg = f'gpu({i}): memory_partition_type({memory_partition_type_name}):' try: - msg = f'gpu({i}): memory_partition_type({memory_partition_type_name}): ' - ret = amdsmi.amdsmi_set_gpu_memory_partition(processors[i], memory_partition_type) - self._print(msg, ret) + amdsmi.amdsmi_set_gpu_memory_partition(gpu, memory_partition_type) + self._print(msg, '') except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, memory_partition_type_cond): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_set_gpu_memory_partition_mode(self): - if self.TODO_SKIP_NOT_COMPLETE: - self.skipTest("Skipping test_set_gpu_memory_partition_mode as it is not complete.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + if self.TODO_SKIP_FAIL: + self.skipTest("Skipping test_set_gpu_memory_partition_mode as it fails (MI350X, AMDSMI_STATUS_UNEXPECTED_DATA).") + for i, gpu in enumerate(self.processors): for memory_partition_type_name, memory_partition_type, memory_partition_type_cond in self.memory_partition_types: + msg = f'gpu({i}): memory_partition_type({memory_partition_type_name}):' try: - msg = f'gpu({i}): memory_partition_type({memory_partition_type_name}): ' - ret = amdsmi.amdsmi_set_gpu_memory_partition_mode(processors[i], memory_partition_type) - self._print(msg, ret) + amdsmi.amdsmi_set_gpu_memory_partition_mode(gpu, memory_partition_type) + self._print(msg, '') except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, memory_partition_type_cond): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_set_gpu_od_clk_info(self): + self._print_func_name('') if self.TODO_SKIP_NOT_COMPLETE: self.skipTest("Skipping test_set_gpu_od_clk_info as it is not complete.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) # TODO value = 0 - value = 0 - for i in range(0, len(processors)): + value = 200 + for i, gpu in enumerate(self.processors): for freq_ind_name, freq_ind, freq_ind_cond in self.freq_inds: for clk_type_name, clk_type, clk_cond in self.clk_types: + msg = f'gpu({i}): freq_ind({freq_ind_name}) value({value}) clk_type({clk_type_name}):' try: - msg = f'gpu({i}): freq_ind({freq_ind_name}) value({value}) clk_type({clk_type_name}): ' - ret = amdsmi.amdsmi_set_gpu_od_clk_info(processors[i], freq_ind, value, clk_type) - self._print(msg, ret) + amdsmi.amdsmi_set_gpu_od_clk_info(gpu, freq_ind, value, clk_type) + self._print(msg, '') except amdsmi.AmdSmiLibraryException as e: if not freq_ind_cond == self.PASS: self._check_ret(msg, e, freq_ind_cond) - raise_exception = e + self.raise_exception = e elif not clk_cond == self.PASS: self._check_ret(msg, e, clk_cond) - raise_exception = e + self.raise_exception = e else: self._check_ret(msg, e, self.PASS) - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_set_gpu_od_volt_info(self): + self._print_func_name('') if self.TODO_SKIP_NOT_COMPLETE: self.skipTest("Skipping test_set_gpu_od_volt_info as it is not complete.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) # TODO vpoint = 0 clk_value = 0 volt_value = 0 vpoint = 0 clk_value = 0 volt_value = 0 - for i in range(0, len(processors)): + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}): vpoint({vpoint}) clk_value({clk_value}) volt_value({volt_value}):' try: - msg = f'gpu({i}): vpoint({vpoint}) clk_value({clk_value}) volt_value({volt_value}): ' - ret = amdsmi.amdsmi_set_gpu_od_volt_info(processors[i], vpoint, clk_value, volt_value) - self._print(msg, ret) + amdsmi.amdsmi_set_gpu_od_volt_info(gpu, vpoint, clk_value, volt_value) + self._print(msg, '') except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_set_gpu_overdrive_level(self): - if self.TODO_SKIP_NOT_COMPLETE: - self.skipTest("Skipping test_set_gpu_overdrive_level as it is not complete.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - # TODO overdrive_value = 0 - overdrive_value = 0 - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): overdrive_value({overdrive_value}): ' - ret = amdsmi.amdsmi_set_gpu_overdrive_level(processors[i], overdrive_value) - self._print(msg, ret) + # Find current overdrive value + overdrive_value_current = amdsmi.amdsmi_get_gpu_overdrive_level(gpu) + if overdrive_value_current != 1: + overdrive_value = 1 + else: + overdrive_value = 2 + + # Set overdrive value + msg = f'gpu({i}): overdrive_value({overdrive_value}):' + amdsmi.amdsmi_set_gpu_overdrive_level(gpu, overdrive_value) + self._print(msg, '') + + # Set back to original overdrive value + amdsmi.amdsmi_set_gpu_overdrive_level(gpu, overdrive_value_current) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_set_gpu_pci_bandwidth(self): - if self.TODO_SKIP_NOT_COMPLETE: - self.skipTest("Skipping test_set_gpu_pci_bandwidth as it is not complete.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - # TODO bitmask = 0 - bitmask = 0 - for i in range(0, len(processors)): + self._print_func_name('') + if self.TODO_SKIP_FAIL: + self.skipTest("Skipping test_set_gpu_pci_bandwidth as it fails (MI350X, AMDSMI_STATUS_UNEXPECTED_DATA).") + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): bitmask({bitmask}): ' - ret = amdsmi.amdsmi_set_gpu_pci_bandwidth(processors[i], bitmask) - self._print(msg, ret) + # Get current PCI bandwidth info + bandwidth_info = amdsmi.amdsmi_get_gpu_pci_bandwidth(gpu) + current_bandwidth_index = bandwidth_info['transfer_rate']['current'] + if current_bandwidth_index > 0: + bitmask = 1 << (current_bandwidth_index - 1) + else: + bitmask = 1 << (current_bandwidth_index) + + # Set PCI bandwidth + msg = f'gpu({i}): bitmask({bitmask}):' + amdsmi.amdsmi_set_gpu_pci_bandwidth(gpu, bitmask) + self._print(msg, '') + + # Set back to original PCI bandwidth + bitmask = 1 << (current_bandwidth_index) + amdsmi.amdsmi_set_gpu_pci_bandwidth(gpu, bitmask) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_set_gpu_perf_determinism_mode(self): + self._print_func_name('') if self.TODO_SKIP_NOT_COMPLETE: self.skipTest("Skipping test_set_gpu_perf_determinism_mode as it is not complete.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) # TODO clk_value = 0 clk_value = 0 - for i in range(0, len(processors)): + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}): clk_value({clk_value}):' try: - msg = f'gpu({i}): clk_value({clk_value}): ' - ret = amdsmi.amdsmi_set_gpu_perf_determinism_mode(processors[i], clk_value) - self._print(msg, ret) + amdsmi.amdsmi_set_gpu_perf_determinism_mode(gpu, clk_value) + self._print(msg, '') except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_set_gpu_perf_level(self): - if self.TODO_SKIP_NOT_COMPLETE: - self.skipTest("Skipping test_set_gpu_perf_level as it is not complete.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - # TODO Need to set back to default - for i in range(0, len(processors)): + self._print_func_name('') + dev_perf_level_current = self.dev_perf_levels[0][1] + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' + try: + dev_perf_level_name_current = amdsmi.amdsmi_get_gpu_perf_level(gpu) + items = dev_perf_level_name_current.split('_') + dev_perf_level_name_current = items[-1] + except amdsmi.AmdSmiLibraryException as e: + self._print(msg, e) + continue for dev_perf_level_name, dev_perf_level, dev_perf_level_cond in self.dev_perf_levels: + msg = f'gpu({i}):' try: - msg = f'gpu({i}): dev_perf_level({dev_perf_level_name}): ' - ret = amdsmi.amdsmi_set_gpu_perf_level(processors[i], dev_perf_level) - self._print(msg, ret) + if dev_perf_level_name_current == dev_perf_level_name: + dev_perf_level_current = dev_perf_level + + msg = f'{msg} dev_perf_level({dev_perf_level_name}):' + amdsmi.amdsmi_set_gpu_perf_level(gpu, dev_perf_level) + self._print(msg, '') except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, dev_perf_level_cond): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + try: + amdsmi.amdsmi_set_gpu_perf_level(gpu, dev_perf_level_current) + except amdsmi.AmdSmiLibraryException as e: + self._print(msg, e) + continue + if self.raise_exception: + raise self.raise_exception + return def test_set_gpu_power_profile(self): - if self.TODO_SKIP_NOT_COMPLETE: - self.skipTest("Skipping test_set_gpu_power_profile as it is not complete.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): - for power_profile_preset_mask, power_profile_preset_masks_cond in self.power_profile_preset_masks: + self._print_func_name('') + for i, gpu in enumerate(self.processors): + for power_profile_preset_mask_name, power_profile_preset_mask, power_profile_preset_masks_cond in self.power_profile_preset_masks: + msg = f'gpu({i}): power_profile_preset_mask({power_profile_preset_mask_name}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_set_gpu_power_profile(processors[i], 0, power_profile_preset_mask) - self._print(msg, ret) + amdsmi.amdsmi_set_gpu_power_profile(gpu, 0, power_profile_preset_mask) + self._print(msg, '') except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, power_profile_preset_masks_cond): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_set_gpu_process_isolation(self): + self._print_func_name('') + pisolates = [1, 0] + for i, gpu in enumerate(self.processors): + for pisolate in pisolates: + msg = f'gpu({i}): pisolate({pisolate})' + try: + amdsmi.amdsmi_set_gpu_process_isolation(gpu, pisolate) + self._print(msg) + except amdsmi.AmdSmiLibraryException as e: + if self._check_ret(msg, e, self.PASS): + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return + + def test_power_cap(self): + '''test power cap''' + self._print_func_name('') + for i, gpu in enumerate(self.processors): + # Get Power Cap Info + msg = f'### test amdsmi_get_power_cap_info(gpu={i})' + try: + power_cap_info = amdsmi.amdsmi_get_power_cap_info(gpu) + self._print(msg, power_cap_info) + except amdsmi.AmdSmiLibraryException as e: + if self._check_ret(msg, e, self.PASS): + self.raise_exception = e + # Have to be able to get info before setting + continue + + # Set to Average Power Cap + try: + cap = int((power_cap_info['max_power_cap'] + power_cap_info['min_power_cap']) / 2) + msg = f'### test amdsmi_set_power_cap(gpu={i}, 0, cap={cap})' + amdsmi.amdsmi_set_power_cap(gpu, 0, cap) + self._print(msg, '') + except amdsmi.AmdSmiLibraryException as e: + if self._check_ret(msg, e, self.PASS): + self.raise_exception = e + + # Restore Power Cap + try: + cap = power_cap_info['power_cap'] + msg = f'### test amdsmi_set_power_cap(gpu={i}, 0, cap={cap})' + amdsmi.amdsmi_set_power_cap(gpu, 0, cap) + self._print(msg, '') + except amdsmi.AmdSmiLibraryException as e: + if self._check_ret(msg, e, self.PASS): + self.raise_exception = e + + if self.raise_exception: + raise self.raise_exception + return + + def test_soc_pstate(self): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' + # Get current policy info + msg1 = f'{msg} amdsmi_get_soc_pstate' + try: + policy_info = amdsmi.amdsmi_get_soc_pstate(gpu) + self._print(msg1, '') + + num_supported = policy_info['num_supported'] + if not isinstance(num_supported, int): + self._print('Cannot determine num_supported={num_supported}', '') + continue + policy_id_current = policy_info['current_id'] + if not isinstance(policy_id_current, int): + self._print('Cannot determine policy_id_current={policy_id_current}', '') + continue + policy_id_orig = policy_info['policies'][policy_id_current]['policy_id'] + if not isinstance(policy_id_orig, int): + self._print('Cannot determine orig policy_id={policy_id_orig}', '') + continue + + index = 0 + if num_supported >= 2: + if policy_id_current != 0: + index = 1 + policy_id = policy_info['policies'][index]['policy_id'] + except amdsmi.AmdSmiLibraryException as e: + if self._check_ret(msg1, e, self.PASS): + self.raise_exception = e + continue + + # Set SOC Pstate policy + msg1 = f'{msg} policy_id({policy_id}):' + try: + amdsmi.amdsmi_set_soc_pstate(gpu, policy_id) + self._print(msg1, '') + except amdsmi.AmdSmiLibraryException as e: + if self._check_ret(msg1, e, self.PASS): + self.raise_exception = e + continue + + # Set back to original policy + msg1 = f'{msg} policy_id({policy_id_orig}):' + try: + amdsmi.amdsmi_set_soc_pstate(gpu, policy_id_orig) + except amdsmi.AmdSmiLibraryException as e: + if self._check_ret(msg1, e, self.PASS): + self.raise_exception = e + + if self.raise_exception: + raise self.raise_exception + return + + def test_xgmi_plpd(self): + self._print_func_name('') if self.TODO_SKIP_FAIL: - self.skipTest("Skipping test_set_gpu_process_isolation as it is not yet implemented.") - raise_exception = None + self.skipTest("Skipping test_set_xgmi_plpd as it fails on MI300.") + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - # TODO pisolate = 0 - pisolate = 0 - for i in range(0, len(processors)): + # Get current policy info + msg1 = f'{msg} amdsmi_get_xgmi_plpd()' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_set_gpu_process_isolation(processors[i], pisolate) - self._print(msg, ret) + policy_info = amdsmi.amdsmi_get_xgmi_plpd(gpu) + self._print(msg1, '') + + num_supported = policy_info['num_supported'] + if not isinstance(num_supported, int): + self._print('Cannot determine num_supported={num_supported}', '') + continue + policy_id_current = policy_info['current_id'] + if not isinstance(policy_id_current, int): + self._print('Cannot determine policy_id_current={policy_id_current}', '') + continue + policy_id_orig = policy_info['policies'][policy_id_current]['policy_id'] + if not isinstance(policy_id_orig, int): + self._print('Cannot determine orig policy_id={policy_id_orig}', '') + continue + index = 0 + if num_supported >= 2: + if policy_id_current != 0: + index = 1 + policy_id = policy_info['policies'][index]['policy_id'] + if not isinstance(policy_id, int): + self._print('Cannot determine policy_id={policy_id}', '') + continue except amdsmi.AmdSmiLibraryException as e: - if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception + if self._check_ret(msg1, e, self.PASS): + self.raise_exception = e + continue - - - def test_set_power_cap(self): - if self.TODO_SKIP_NOT_COMPLETE: - self.skipTest("Skipping test_set_power_cap as it is not complete.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - # TODO cap = 0 - cap = 0 - for i in range(0, len(processors)): + # Set policy + msg1 = f'{msg} policy_id({policy_id}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_set_power_cap(processors[i], 0, cap) - self._print(msg, ret) + amdsmi.amdsmi_set_xgmi_plpd(gpu, policy_id) + self._print(msg1, '') except amdsmi.AmdSmiLibraryException as e: - if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception + if self._check_ret(msg1, e, self.PASS): + self.raise_exception = e - - - def test_set_soc_pstate(self): - if self.TODO_SKIP_NOT_COMPLETE: - self.skipTest("Skipping test_set_soc_pstate as it is not complete.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - # TODO policy_id = 0 - policy_id = 0 - for i in range(0, len(processors)): + # Set back to original policy try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_set_soc_pstate(processors[i], policy_id) - self._print(msg, ret) + msg1 = f'{msg} policy_id({policy_id_orig}):' + amdsmi.amdsmi_set_xgmi_plpd(gpu, policy_id_orig) except amdsmi.AmdSmiLibraryException as e: - if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - - - def test_set_xgmi_plpd(self): - if self.TODO_SKIP_NOT_COMPLETE: - self.skipTest("Skipping test_set_xgmi_plpd as it is not complete.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - # TODO policy_id = 0 - policy_id = 0 - for i in range(0, len(processors)): - try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_set_xgmi_plpd(processors[i], policy_id) - self._print(msg, ret) - except amdsmi.AmdSmiLibraryException as e: - if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + if self._check_ret(msg1, e, self.PASS): + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_status_code_to_string(self): + self._print_func_name('') if self.TODO_SKIP_FAIL: - self.skipTest("Skipping test_status_code_to_string as it fails.") - raise_exception = None - - # TODO status = 0 - status = 0 - status_code_to_string = amdsmi.amdsmi_status_code_to_string(status) - self._print(f' {status} status_code_to_string: {status_code_to_string}') - if raise_exception: - raise raise_exception - - - - def test_stop_gpu_event_notification(self): - if self.TODO_SKIP_NYI: - self.skipTest("Skipping test_stop_gpu_event_notification as it is not implemented yet.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self.skipTest("Skipping test_status_code_to_string as it fails (Unhashable type).") + for error_num, error_name in error_map.items(): + msg = f'{error_name}({error_num}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_stop_gpu_event_notification(processors[i]) - self._print(msg, ret) + ret = amdsmi.amdsmi_status_code_to_string(ctypes.c_uint32(int(error_num))) + self._print(f'{msg} {ret}') except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_topo_get_link_type(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): - for j in range(0, len(processors)): + self._print_func_name('') + for i, gpu_i in enumerate(self.processors): + for j, gpu_j in enumerate(self.processors): + msg = f'gpu({i},{j}):' try: - msg = f'gpu({i},{j}): ' - ret = amdsmi.amdsmi_topo_get_link_type(processors[i], processors[j]) + ret = amdsmi.amdsmi_topo_get_link_type(gpu_i, gpu_j) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_topo_get_link_weight(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): - for j in range(0, len(processors)): + self._print_func_name('') + for i, gpu_i in enumerate(self.processors): + for j, gpu_j in enumerate(self.processors): + msg = f'gpu({i},{j}):' try: - msg = f'gpu({i},{j}): ' - ret = amdsmi.amdsmi_topo_get_link_weight(processors[i], processors[j]) + ret = amdsmi.amdsmi_topo_get_link_weight(gpu_i, gpu_j) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_topo_get_numa_node_number(self): - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' try: - msg = f'gpu({i}): ' - ret = amdsmi.amdsmi_topo_get_numa_node_number(processors[i]) + ret = amdsmi.amdsmi_topo_get_numa_node_number(gpu) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception - - + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return def test_topo_get_p2p_status(self): + self._print_func_name('') if self.TODO_SKIP_FAIL: - self.skipTest("Skipping test_topo_get_p2p_status as it fails.") - raise_exception = None - - processors = amdsmi.amdsmi_get_processor_handles() - self.assertGreaterEqual(len(processors), 1) - self.assertLessEqual(len(processors), 32) - for i in range(0, len(processors)): - for j in range(0, len(processors)): + self.skipTest("Skipping test_topo_get_p2p_status as it fails (Inval parameters).") + for i, gpu_i in enumerate(self.processors): + for j, gpu_j in enumerate(self.processors): + msg = f'gpu({i},{j}):' try: - msg = f'gpu({i},{j}): ' - ret = amdsmi.amdsmi_topo_get_p2p_status(processors[i], processors[j]) + ret = amdsmi.amdsmi_topo_get_p2p_status(gpu_i, gpu_j) self._print(msg, ret) except amdsmi.AmdSmiLibraryException as e: if self._check_ret(msg, e, self.PASS): - raise_exception = e - if raise_exception: - raise raise_exception + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return -def print_test_ids(suite): - for test in suite: - if isinstance(test, unittest.TestSuite): - print_test_ids(test) - else: - print(" -", test.id()) + def test_get_gpu_busy_percent(self): + self._print_func_name('') + for i, gpu in enumerate(self.processors): + msg = f'gpu({i}):' + try: + ret = amdsmi.amdsmi_get_gpu_busy_percent(gpu) + self._print(msg, ret) + except amdsmi.AmdSmiLibraryException as e: + if self._check_ret(msg, e, self.PASS): + self.raise_exception = e + if self.raise_exception: + raise self.raise_exception + return if __name__ == '__main__': - import sys - import unittest - import os - - print("AMD SMI Unit Tests") - verbose=1 - if '-q' in sys.argv or '--quiet' in sys.argv: - verbose=0 - elif '-v' in sys.argv or '--verbose' in sys.argv: - verbose=2 - - # If no -k or --keyword argument is given, print all available tests - if not ('-k' in sys.argv or '--keyword' in sys.argv): - loader = unittest.TestLoader() - suite = loader.loadTestsFromModule(sys.modules[__name__]) - print("==============================================================") - print("Available tests:") - print_test_ids(suite) - - # Provide Legend for test results, otherwise it is not clear what the output means - print("==============================================================") - print("Legend: . = pass, s = skipped, F = fail, E = error") - print("==============================================================") - print("Running tests...\n") - - # Detect if ran without sudo or root privileges - if os.geteuid() != 0: - print("Warning: Some tests may require elevated privileges (sudo/root) to run completely.\n") - print("Please relaunch with elevated privileges.\n") - sys.exit(1) - - runner = unittest.TextTestRunner(verbosity=verbose) - unittest.main(testRunner=runner) + unittest.main()