From e09dc2fd47019ee9e7e4f2e1314fb15a3950011e Mon Sep 17 00:00:00 2001 From: Dalibor Stanisavljevic Date: Wed, 9 Nov 2022 17:32:55 +0100 Subject: [PATCH] SWDEV-361376 - Added API calls to the python interface - Added Physical State Query API calls - Added Clock, Power and Performance Query API calls - README for both additions Change-Id: Icf412364a13c9e51b9630f19f29a7cdfbe46f7fe Signed-off-by: Dalibor Stanisavljevic [ROCm/amdsmi commit: 62a69fb802a9db441a1ae89df2be773afa801594] --- projects/amdsmi/py-interface/README.md | 487 ++++++++++++++++++ projects/amdsmi/py-interface/__init__.py | 18 + .../amdsmi/py-interface/amdsmi_interface.py | 450 +++++++++++++++- 3 files changed, 935 insertions(+), 20 deletions(-) diff --git a/projects/amdsmi/py-interface/README.md b/projects/amdsmi/py-interface/README.md index 846b90b2e5..298721af5e 100644 --- a/projects/amdsmi/py-interface/README.md +++ b/projects/amdsmi/py-interface/README.md @@ -1506,3 +1506,490 @@ try: except AmdSmiException as e: print(e) ``` +## amdsmi_dev_fan_rpms_get +Description: Get the fan speed in RPMs of the device with the specified device +handle and 0-based sensor index. + +Input parameters: +* `device_handle` handle for the given device +* `sensor_idx` a 0-based sensor index. Normally, this will be 0. If a device has +more than one sensor, it could be greater than 0. + +Output: Fan speed in rpms as integer + +Exceptions that can be thrown by `amdsmi_dev_fan_rpms_get` function: +* `AmdSmiLibraryException` +* `AmdSmiRetryException` +* `AmdSmiParameterException` + +Example: +```python +try: + devices = amdsmi_get_device_handles() + if len(devices) == 0: + print("No GPUs on machine") + else: + for device in devices: + fan_rpm = amdsmi_dev_fan_rpms_get(device, 0) + print(fan_rpm) +except AmdSmiException as e: + print(e) +``` +## amdsmi_dev_fan_speed_get +Description: Get the fan speed for the specified device as a value relative to +AMDSMI_MAX_FAN_SPEED + +Input parameters: +* `device_handle` handle for the given device +* `sensor_idx` a 0-based sensor index. Normally, this will be 0. If a device has +more than one sensor, it could be greater than 0. + +Output: Fan speed in relative to MAX + +Exceptions that can be thrown by `amdsmi_dev_fan_speed_get` function: +* `AmdSmiLibraryException` +* `AmdSmiRetryException` +* `AmdSmiParameterException` + +Example: +```python +try: + devices = amdsmi_get_device_handles() + if len(devices) == 0: + print("No GPUs on machine") + else: + for device in devices: + fan_speed = amdsmi_dev_fan_speed_get(device, 0) + print(fan_speed) +except AmdSmiException as e: + print(e) +``` +## amdsmi_dev_fan_speed_max_get +Description: Get the max fan speed of the device with provided device handle + +Input parameters: +* `device_handle` handle for the given device +* `sensor_idx` a 0-based sensor index. Normally, this will be 0. If a device has +more than one sensor, it could be greater than 0. + +Output: Max fan speed as integer + +Exceptions that can be thrown by `amdsmi_dev_fan_speed_max_get` function: +* `AmdSmiLibraryException` +* `AmdSmiRetryException` +* `AmdSmiParameterException` + +Example: +```python +try: + devices = amdsmi_get_device_handles() + if len(devices) == 0: + print("No GPUs on machine") + else: + for device in devices: + max_fan_speed = amdsmi_dev_fan_speed_max_get(device, 0) + print(max_fan_speed) +except AmdSmiException as e: + print(e) +``` +## amdsmi_dev_temp_metric_get +Description: Get the temperature metric value for the specified metric, from the +specified temperature sensor on the specified device + +Input parameters: +* `device_handle` handle for the given device +* `sensor_type` part of device from which temperature should be obtained +* `metric` enum indicated which temperature value should be retrieved + +Output: Temperature as integer in millidegrees Celcius + +Exceptions that can be thrown by `amdsmi_dev_temp_metric_get` function: +* `AmdSmiLibraryException` +* `AmdSmiRetryException` +* `AmdSmiParameterException` + +Example: +```python +try: + devices = amdsmi_get_device_handles() + if len(devices) == 0: + print("No GPUs on machine") + else: + for device in devices: + temp_metric = amdsmi_dev_temp_metric_get(dev, AmdSmiTemperatureType.EDGE, + AmdSmiTemperatureMetric.CURRENT) + print(temp_metric) +except AmdSmiException as e: + print(e) +``` +## amdsmi_dev_volt_metric_get +Description: Get the voltage metric value for the specified metric, from the +specified voltage sensor on the specified device + +Input parameters: +* `device_handle` handle for the given device +* `sensor_type` part of device from which voltage should be obtained +* `metric` enum indicated which voltage value should be retrieved + +Output: Voltage as integer in millivolts + +Exceptions that can be thrown by `amdsmi_dev_volt_metric_get` function: +* `AmdSmiLibraryException` +* `AmdSmiRetryException` +* `AmdSmiParameterException` + +Example: +```python +try: + devices = amdsmi_get_device_handles() + if len(devices) == 0: + print("No GPUs on machine") + else: + for device in devices: + voltage = amdsmi_dev_volt_metric_get(dev, AmdSmiVoltageType.VDDGFX, + AmdSmiVoltageMetric.AVERAGE) + print(voltage) +except AmdSmiException as e: + print(e) +``` +## amdsmi_dev_busy_percent_get +Description: Get percentage of time device is busy doing any processing + +Input parameters: +* `device_handle` handle for the given device + +Output: How busy the device is (as percentage of time) + +Exceptions that can be thrown by `amdsmi_dev_busy_percent_get` function: +* `AmdSmiLibraryException` +* `AmdSmiRetryException` +* `AmdSmiParameterException` + +Example: +```python +try: + devices = amdsmi_get_device_handles() + if len(devices) == 0: + print("No GPUs on machine") + else: + for device in devices: + busy = amdsmi_dev_busy_percent_get(dev) + print(busy) +except AmdSmiException as e: + print(e) +``` +## amdsmi_utilization_count_get +Description: Get coarse grain utilization counter of the specified device + +Input parameters: +* `device_handle` handle for the given device +* `counter_types` variable number of counter types desired + +Output: List containing dictionaries with fields + +Field | Description +---|--- +`timestamp` | The timestamp when the counter is retreived - Resolution: 1 ns +`Dictionary for each counter` |
Subfield Description
`type`Type of utilization counter
`value`Value gotten for utilization counter
+ +Exceptions that can be thrown by `amdsmi_utilization_count_get` function: +* `AmdSmiLibraryException` +* `AmdSmiRetryException` +* `AmdSmiParameterException` + +Example: +```python +try: + devices = amdsmi_get_device_handles() + if len(devices) == 0: + print("No GPUs on machine") + else: + for device in devices: + utilization = amdsmi_utilization_count_get( + dev, + AmdSmiUtilizationCounterType.COARSE_GRAIN_GFX_ACTIVITY + ) + print(utilization) + utilization = amdsmi_utilization_count_get( + dev, + AmdSmiUtilizationCounterType.COARSE_GRAIN_GFX_ACTIVITY, + AmdSmiUtilizationCounterType.COARSE_GRAIN_MEM_ACTIVITY + ) + print(utilization) +except AmdSmiException as e: + print(e) +``` +## amdsmi_dev_perf_level_get +Description: Get the performance level of the device with provided device handle + +Input parameters: +* `device_handle` handle for the given device + +Output: Performance level as enum value of dev_perf_level_t + +Exceptions that can be thrown by `amdsmi_dev_perf_level_get` function: +* `AmdSmiLibraryException` +* `AmdSmiRetryException` +* `AmdSmiParameterException` + +Example: +```python +try: + devices = amdsmi_get_device_handles() + if len(devices) == 0: + print("No GPUs on machine") + else: + for device in devices: + perf_level = amdsmi_dev_perf_level_get(dev) + print(perf_level) +except AmdSmiException as e: + print(e) +``` +## amdsmi_perf_determinism_mode_set +Description: Enter performance determinism mode with provided device handle + +Input parameters: +* `device_handle` handle for the given device +* `clkvalue` softmax value for GFXCLK in MHz + +Output: None + +Exceptions that can be thrown by `amdsmi_perf_determinism_mode_set` function: +* `AmdSmiLibraryException` +* `AmdSmiRetryException` +* `AmdSmiParameterException` + +Example: +```python +try: + devices = amdsmi_get_device_handles() + if len(devices) == 0: + print("No GPUs on machine") + else: + for device in devices: + amdsmi_perf_determinism_mode_set(dev, 1333) +except AmdSmiException as e: + print(e) +``` +## amdsmi_dev_overdrive_level_get +Description: Get the overdrive percent associated with the device with provided +device handle + +Input parameters: +* `device_handle` handle for the given device + +Output: Overdrive percentage as integer + +Exceptions that can be thrown by `amdsmi_dev_overdrive_level_get` function: +* `AmdSmiLibraryException` +* `AmdSmiRetryException` +* `AmdSmiParameterException` + +Example: +```python +try: + devices = amdsmi_get_device_handles() + if len(devices) == 0: + print("No GPUs on machine") + else: + for device in devices: + od_level = amdsmi_dev_overdrive_level_get(dev) + print(od_level) +except AmdSmiException as e: + print(e) +``` +## amdsmi_dev_gpu_clk_freq_get +Description: Get the list of possible system clock speeds of device for a +specified clock type + +Input parameters: +* `device_handle` handle for the given device +* `clk_type` the type of clock for which the frequency is desired + +Output: Dictionary with fields + +Field | Description +---|--- +`num_supported`| The number of supported frequencies +`current`| The current frequency index +`frequency`| List of frequencies, only the first num_supported frequencies are valid + +Exceptions that can be thrown by `amdsmi_dev_gpu_clk_freq_get` function: +* `AmdSmiLibraryException` +* `AmdSmiRetryException` +* `AmdSmiParameterException` + +Example: +```python +try: + devices = amdsmi_get_device_handles() + if len(devices) == 0: + print("No GPUs on machine") + else: + for device in devices: + amdsmi_dev_gpu_clk_freq_get(dev, AmdSmiClockType.SYS) +except AmdSmiException as e: + print(e) +``` +## amdsmi_dev_od_volt_info_get +Description: This function retrieves the voltage/frequency curve information + +Input parameters: +* `device_handle` handle for the given device + +Output: Dictionary with fields + +Field | Description +---|--- +`curr_sclk_range` |
Subfield Description
`lower_bound`lower bound sclk range
`upper_bound`upper bound sclk range
+`curr_mclk_range` |
Subfield Description
`lower_bound`lower bound mclk range
`upper_bound`upper bound mclk range
+`sclk_freq_limits` |
Subfield Description
`lower_bound`lower bound sclk range limt
`upper_bound`upper bound sclk range limit
+`mclk_freq_limits` |
Subfield Description
`lower_bound`lower bound mclk range limit
`upper_bound`upper bound mclk range limit
+`curve.vc_points`| The number of supported frequencies +`num_regions`| The current frequency index + + +Exceptions that can be thrown by `amdsmi_dev_od_volt_info_get` function: +* `AmdSmiLibraryException` +* `AmdSmiRetryException` +* `AmdSmiParameterException` + +Example: +```python +try: + devices = amdsmi_get_device_handles() + if len(devices) == 0: + print("No GPUs on machine") + else: + for device in devices: + amdsmi_dev_od_volt_info_get(dev) +except AmdSmiException as e: + print(e) +``` +## amdsmi_dev_gpu_metrics_info_get +Description: This function retrieves the gpu metrics information + +Input parameters: +* `device_handle` handle for the given device + +Output: Dictionary with fields + +Field | Description +`---|--- +`temperature_edge` | edge temperature value +`temperature_hotspot` | hotspot temperature value +`temperature_mem` | memory temperature value +`temperature_vrgfx` | vrgfx temperature value +`temperature_vrsoc` | vrsoc temperature value +`temperature_vrmem` | vrmem temperature value +`average_gfx_activity` | average gfx activity +`average_umc_activity` | average umc activity +`average_mm_activity` | average mm activity +`average_socket_power` | average socket power +`energy_accumulator` | energy accumulator value +`system_clock_counter` | system clock counter +`average_gfxclk_frequency` | average gfx clock frequency +`average_socclk_frequency` | average soc clock frequency +`average_uclk_frequency` | average uclk frequency +`average_vclk0_frequency` | average vclk0 frequency +`average_dclk0_frequency` | average dclk0 frequency +`average_vclk1_frequency` | average vclk1 frequency +`average_dclk1_frequency` | average dclk1 frequency +`current_gfxclk` | current gfx clock +`current_socclk` | current soc clock +`current_uclk` | current uclk +`current_vclk0` | current vclk0 +`current_dclk0` | current dclk0 +`current_vclk1` | current vclk1 +`current_dclk1` | current dclk1 +`throttle_status` | current throttle status +`current_fan_speed` | current fan speed +`pcie_link_width` | pcie link width +`pcie_link_speed` | pcie link speed +`padding` | padding +`gfx_activity_acc` | gfx activity acc +`mem_actvity_acc` | mem activity acc +`temperature_hbm` | hbm temperature + +Exceptions that can be thrown by `amdsmi_dev_gpu_metrics_info_get` function: +* `AmdSmiLibraryException` +* `AmdSmiRetryException` +* `AmdSmiParameterException` + +Example: +```python +try: + devices = amdsmi_get_device_handles() + if len(devices) == 0: + print("No GPUs on machine") + else: + for device in devices: + amdsmi_dev_gpu_metrics_info_get(dev) +except AmdSmiException as e: + print(e) +``` +## amdsmi_dev_od_volt_curve_regions_get +Description: This function will retrieve the current valid regions in the +frequency/voltage space + +Input parameters: +* `device_handle` handle for the given device +* `num_regions` number of freq volt regions + +Output: List containing a dictionary with fields for each freq volt region + +Field | Description +---|--- +`freq_range` |
Subfield Description
`lower_bound`lower bound freq range
`upper_bound`upper bound freq range
+`volt_range` |
Subfield Description
`lower_bound`lower bound volt range
`upper_bound`upper bound volt range
+ +Exceptions that can be thrown by `amdsmi_dev_od_volt_curve_regions_get` function: +* `AmdSmiLibraryException` +* `AmdSmiRetryException` +* `AmdSmiParameterException` + +Example: +```python +try: + devices = amdsmi_get_device_handles() + if len(devices) == 0: + print("No GPUs on machine") + else: + for device in devices: + amdsmi_dev_od_volt_curve_regions_get(dev, 3) +except AmdSmiException as e: + print(e) +``` +## amdsmi_dev_power_profile_presets_get +Description: Get the list of available preset power profiles and an indication of +which profile is currently active + +Input parameters: +* `device_handle` handle for the given device +* `sensor_idx` number of freq volt regions + +Output: Dictionary with fields + +Field | Description +---|--- +`available_profiles`| Which profiles are supported by this system +`current`| Which power profile is currently active +`num_profiles`| How many power profiles are available + +Exceptions that can be thrown by `amdsmi_dev_power_profile_presets_get` function: +* `AmdSmiLibraryException` +* `AmdSmiRetryException` +* `AmdSmiParameterException` + +Example: +```python +try: + devices = amdsmi_get_device_handles() + if len(devices) == 0: + print("No GPUs on machine") + else: + for device in devices: + amdsmi_dev_power_profile_presets_get(dev, 0) +except AmdSmiException as e: + print(e) +``` diff --git a/projects/amdsmi/py-interface/__init__.py b/projects/amdsmi/py-interface/__init__.py index 4f4932782b..2f8d8c62a8 100644 --- a/projects/amdsmi/py-interface/__init__.py +++ b/projects/amdsmi/py-interface/__init__.py @@ -87,6 +87,24 @@ from .amdsmi_interface import amdsmi_dev_od_clk_info_set from .amdsmi_interface import amdsmi_dev_od_volt_info_set from .amdsmi_interface import amdsmi_dev_perf_level_set_v1 +# # Physical State Queries +from .amdsmi_interface import amdsmi_dev_fan_rpms_get +from .amdsmi_interface import amdsmi_dev_fan_speed_get +from .amdsmi_interface import amdsmi_dev_fan_speed_max_get +from .amdsmi_interface import amdsmi_dev_temp_metric_get +from .amdsmi_interface import amdsmi_dev_volt_metric_get + +# # Clock, Power and Performance Query +from .amdsmi_interface import amdsmi_dev_busy_percent_get +from .amdsmi_interface import amdsmi_utilization_count_get +from .amdsmi_interface import amdsmi_dev_perf_level_get +from .amdsmi_interface import amdsmi_perf_determinism_mode_set +from .amdsmi_interface import amdsmi_dev_overdrive_level_get +from .amdsmi_interface import amdsmi_dev_gpu_clk_freq_get +from .amdsmi_interface import amdsmi_dev_od_volt_info_get +from .amdsmi_interface import amdsmi_dev_gpu_metrics_info_get +from .amdsmi_interface import amdsmi_dev_od_volt_curve_regions_get +from .amdsmi_interface import amdsmi_dev_power_profile_presets_get # # Events from .amdsmi_interface import AmdSmiEventReader diff --git a/projects/amdsmi/py-interface/amdsmi_interface.py b/projects/amdsmi/py-interface/amdsmi_interface.py index a40559e699..57d4d50bd4 100644 --- a/projects/amdsmi/py-interface/amdsmi_interface.py +++ b/projects/amdsmi/py-interface/amdsmi_interface.py @@ -22,7 +22,6 @@ import ctypes from typing import Union, Any, Dict, List, Tuple from enum import IntEnum -from collections.abc import Iterable from . import amdsmi_wrapper from .amdsmi_exception import * @@ -292,43 +291,56 @@ class AmdSmiUtilizationCounterType(IntEnum): class AmdSmiEventReader: - def __init__(self, device_handle: amdsmi_wrapper.amdsmi_device_handle, *event_types): + def __init__( + self, device_handle: amdsmi_wrapper.amdsmi_device_handle, *event_types + ): if not isinstance(device_handle, amdsmi_wrapper.amdsmi_device_handle): raise AmdSmiParameterException( device_handle, amdsmi_wrapper.amdsmi_device_handle - ) + ) if not isinstance(event_types, Iterable): - raise AmdSmiParameterException( - event_types, Iterable - ) + raise AmdSmiParameterException(event_types, Iterable) for event_type in event_types: if not isinstance(event_type, AmdSmiEvtNotificationType): - raise AmdSmiParameterException( - event_type, AmdSmiEvtNotificationType - ) + raise AmdSmiParameterException(event_type, AmdSmiEvtNotificationType) self.device_handle = device_handle mask = 0 for event_type in event_types: - mask |= (1 << (int(event_type) - 1)) + mask |= 1 << (int(event_type) - 1) _check_res(amdsmi_wrapper.amdsmi_event_notification_init(device_handle)) - _check_res(amdsmi_wrapper.amdsmi_event_notification_mask_set(device_handle, ctypes.c_uint64(mask))) + _check_res( + amdsmi_wrapper.amdsmi_event_notification_mask_set( + device_handle, ctypes.c_uint64(mask) + ) + ) - def read(self, timestamp, num_elem = 10): + def read(self, timestamp, num_elem=10): self.event_info = (amdsmi_wrapper.amdsmi_evt_notification_data_t * num_elem)() - _check_res(amdsmi_wrapper.amdsmi_event_notification_get(ctypes.c_int(timestamp), ctypes.byref( - ctypes.c_uint32(num_elem)), self.event_info)) + _check_res( + amdsmi_wrapper.amdsmi_event_notification_get( + ctypes.c_int(timestamp), + ctypes.byref(ctypes.c_uint32(num_elem)), + self.event_info, + ) + ) ret = list() for i in range(0, num_elem): - if self.event_info[i].event in set(event.value for event in AmdSmiEvtNotificationType): - ret.append({ - 'device_handle' : self.event_info[i].device_handle, - 'event': AmdSmiEvtNotificationType(self.event_info[i].event).name, - 'message': self.event_info[i].message.decode("utf-8") - }) + if self.event_info[i].event in set( + event.value for event in AmdSmiEvtNotificationType + ): + ret.append( + { + "device_handle": self.event_info[i].device_handle, + "event": AmdSmiEvtNotificationType( + self.event_info[i].event + ).name, + "message": self.event_info[i].message.decode("utf-8"), + } + ) return ret @@ -1341,3 +1353,401 @@ def amdsmi_dev_perf_level_set_v1( raise AmdSmiParameterException(perf_lvl, AmdSmiDevPerfLevel) _check_res(amdsmi_wrapper.amdsmi_dev_perf_level_set_v1(device_handle, perf_lvl)) + + +def amdsmi_dev_fan_rpms_get( + device_handle: amdsmi_wrapper.amdsmi_device_handle, sensor_idx: int +) -> int: + if not isinstance(device_handle, amdsmi_wrapper.amdsmi_device_handle): + raise AmdSmiParameterException( + device_handle, amdsmi_wrapper.amdsmi_device_handle + ) + if not isinstance(sensor_idx, int): + raise AmdSmiParameterException(sensor_idx, int) + fan_speed = amdsmi_wrapper.c_int64() + _check_res( + amdsmi_wrapper.amdsmi_dev_fan_rpms_get( + device_handle, sensor_idx, ctypes.byref(fan_speed) + ) + ) + + return fan_speed.value + + +def amdsmi_dev_fan_speed_get( + device_handle: amdsmi_wrapper.amdsmi_device_handle, sensor_idx: int +) -> int: + if not isinstance(device_handle, amdsmi_wrapper.amdsmi_device_handle): + raise AmdSmiParameterException( + device_handle, amdsmi_wrapper.amdsmi_device_handle + ) + if not isinstance(sensor_idx, int): + raise AmdSmiParameterException(sensor_idx, int) + fan_speed = amdsmi_wrapper.c_int64() + _check_res( + amdsmi_wrapper.amdsmi_dev_fan_speed_get( + device_handle, sensor_idx, ctypes.byref(fan_speed) + ) + ) + + return fan_speed.value + + +def amdsmi_dev_fan_speed_max_get( + device_handle: amdsmi_wrapper.amdsmi_device_handle, sensor_idx: int +) -> int: + if not isinstance(device_handle, amdsmi_wrapper.amdsmi_device_handle): + raise AmdSmiParameterException( + device_handle, amdsmi_wrapper.amdsmi_device_handle + ) + if not isinstance(sensor_idx, int): + raise AmdSmiParameterException(sensor_idx, int) + fan_speed = amdsmi_wrapper.c_uint64() + _check_res( + amdsmi_wrapper.amdsmi_dev_fan_speed_max_get( + device_handle, sensor_idx, ctypes.byref(fan_speed) + ) + ) + + return fan_speed.value + + +def amdsmi_dev_temp_metric_get( + device_handle: amdsmi_wrapper.amdsmi_device_handle, + sensor_type: AmdSmiTemperatureType, + metric: AmdSmiTemperatureMetric, +) -> int: + if not isinstance(device_handle, amdsmi_wrapper.amdsmi_device_handle): + raise AmdSmiParameterException( + device_handle, amdsmi_wrapper.amdsmi_device_handle + ) + if not isinstance(sensor_type, AmdSmiTemperatureType): + raise AmdSmiParameterException(sensor_type, AmdSmiTemperatureType) + if not isinstance(metric, AmdSmiTemperatureMetric): + raise AmdSmiParameterException(metric, AmdSmiTemperatureMetric) + + temp_value = amdsmi_wrapper.c_int64() + _check_res( + amdsmi_wrapper.amdsmi_dev_temp_metric_get( + device_handle, sensor_type, metric, ctypes.byref(temp_value) + ) + ) + + return temp_value.value + + +def amdsmi_dev_volt_metric_get( + device_handle: amdsmi_wrapper.amdsmi_device_handle, + sensor_type: AmdSmiVoltageType, + metric: AmdSmiVoltageMetric, +) -> int: + if not isinstance(device_handle, amdsmi_wrapper.amdsmi_device_handle): + raise AmdSmiParameterException( + device_handle, amdsmi_wrapper.amdsmi_device_handle + ) + if not isinstance(sensor_type, AmdSmiVoltageType): + raise AmdSmiParameterException(sensor_type, AmdSmiVoltageType) + if not isinstance(metric, AmdSmiVoltageMetric): + raise AmdSmiParameterException(metric, AmdSmiVoltageMetric) + + voltage = amdsmi_wrapper.c_int64() + _check_res( + amdsmi_wrapper.amdsmi_dev_volt_metric_get( + device_handle, sensor_type, metric, ctypes.byref(voltage) + ) + ) + + return voltage.value + + +def amdsmi_dev_busy_percent_get( + device_handle: amdsmi_wrapper.amdsmi_device_handle, +) -> int: + if not isinstance(device_handle, amdsmi_wrapper.amdsmi_device_handle): + raise AmdSmiParameterException( + device_handle, amdsmi_wrapper.amdsmi_device_handle + ) + + busy_percent = amdsmi_wrapper.c_uint32() + _check_res( + amdsmi_wrapper.amdsmi_dev_busy_percent_get( + device_handle, ctypes.byref(busy_percent) + ) + ) + + return busy_percent.value + + +def amdsmi_utilization_count_get( + device_handle: amdsmi_wrapper.amdsmi_device_handle, + *counter_types: Tuple[AmdSmiUtilizationCounterType] +) -> List[Dict[str, Any]]: + if not isinstance(device_handle, amdsmi_wrapper.amdsmi_device_handle): + raise AmdSmiParameterException( + device_handle, amdsmi_wrapper.amdsmi_device_handle + ) + if not len(counter_types): + raise AmdSmiLibraryException(amdsmi_wrapper.AMDSMI_STATUS_INVAL) + counters = [] + for counter_type in counter_types: + if not isinstance(counter_type, AmdSmiUtilizationCounterType): + raise AmdSmiParameterException(counter_type, AmdSmiUtilizationCounterType) + counter = amdsmi_wrapper.amdsmi_utilization_counter_t() + counter.type = counter_type + counters.append(counter) + + count = amdsmi_wrapper.c_uint32(len(counters)) + timestamp = amdsmi_wrapper.c_uint64() + util_counter_list = (amdsmi_wrapper.amdsmi_utilization_counter_t * len(counters))( + *counters + ) + + _check_res( + amdsmi_wrapper.amdsmi_utilization_count_get( + device_handle, util_counter_list, count, ctypes.byref(timestamp) + ) + ) + if count.value != len(counters): + raise AmdSmiLibraryException(amdsmi_wrapper.AMDSMI_STATUS_API_FAILED) + + result = [{"timestamp": timestamp.value}] + for idx in range(count.value): + counter_type = amdsmi_wrapper.c__EA_AMDSMI_UTILIZATION_COUNTER_TYPE__enumvalues[ + util_counter_list[idx].type + ] + if counter_type == "AMDSMI_UTILIZATION_COUNTER_LAST": + counter_type = "AMDSMI_COARSE_GRAIN_MEM_ACTIVITY" + result.append({"type": counter_type, "value": util_counter_list[idx].value}) + + return result + + +def amdsmi_dev_perf_level_get( + device_handle: amdsmi_wrapper.amdsmi_device_handle, +) -> str: + if not isinstance(device_handle, amdsmi_wrapper.amdsmi_device_handle): + raise AmdSmiParameterException( + device_handle, amdsmi_wrapper.amdsmi_device_handle + ) + + perf = amdsmi_wrapper.amdsmi_dev_perf_level_t() + + _check_res( + amdsmi_wrapper.amdsmi_dev_perf_level_get(device_handle, ctypes.byref(perf)) + ) + + result = amdsmi_wrapper.c__EA_amdsmi_dev_perf_level_t__enumvalues[perf.value] + if result == "AMDSMI_DEV_PERF_LEVEL_FIRST": + result = "AMDSMI_DEV_PERF_LEVEL_AUTO" + if result == "AMDSMI_DEV_PERF_LEVEL_LAST": + result = "AMDSMI_DEV_PERF_LEVEL_DETERMINISM" + + return result + + +def amdsmi_perf_determinism_mode_set( + device_handle: amdsmi_wrapper.amdsmi_device_handle, clkvalue: int +) -> None: + if not isinstance(device_handle, amdsmi_wrapper.amdsmi_device_handle): + raise AmdSmiParameterException( + device_handle, amdsmi_wrapper.amdsmi_device_handle + ) + if not isinstance(clkvalue, int): + raise AmdSmiParameterException(clkvalue, int) + + _check_res(amdsmi_wrapper.amdsmi_perf_determinism_mode_set(device_handle, clkvalue)) + + +def amdsmi_dev_overdrive_level_get( + device_handle: amdsmi_wrapper.amdsmi_device_handle, +) -> int: + if not isinstance(device_handle, amdsmi_wrapper.amdsmi_device_handle): + raise AmdSmiParameterException( + device_handle, amdsmi_wrapper.amdsmi_device_handle + ) + + od_level = amdsmi_wrapper.c_uint32() + _check_res( + amdsmi_wrapper.amdsmi_dev_overdrive_level_get( + device_handle, ctypes.byref(od_level) + ) + ) + + return od_level.value + + +def amdsmi_dev_gpu_clk_freq_get( + device_handle: amdsmi_wrapper.amdsmi_device_handle, clk_type: AmdSmiClockType +) -> Dict[str, Any]: + if not isinstance(device_handle, amdsmi_wrapper.amdsmi_device_handle): + raise AmdSmiParameterException( + device_handle, amdsmi_wrapper.amdsmi_device_handle + ) + if not isinstance(clk_type, AmdSmiClockType): + raise AmdSmiParameterException(clk_type, AmdSmiClockType) + + freq = amdsmi_wrapper.amdsmi_frequencies_t() + _check_res( + amdsmi_wrapper.amdsmi_dev_gpu_clk_freq_get( + device_handle, clk_type, ctypes.byref(freq) + ) + ) + + return { + "num_supported": freq.num_supported, + "current": freq.current, + "frequency": list(freq.frequency)[: freq.num_supported - 1], + } + + +def amdsmi_dev_od_volt_info_get( + device_handle: amdsmi_wrapper.amdsmi_device_handle, +) -> Dict[str, Any]: + if not isinstance(device_handle, amdsmi_wrapper.amdsmi_device_handle): + raise AmdSmiParameterException( + device_handle, amdsmi_wrapper.amdsmi_device_handle + ) + + freq_data = amdsmi_wrapper.amdsmi_od_volt_freq_data_t() + _check_res( + amdsmi_wrapper.amdsmi_dev_od_volt_info_get( + device_handle, ctypes.byref(freq_data) + ) + ) + + return { + "curr_sclk_range": { + "lower_bound": freq_data.curr_sclk_range.lower_bound, + "upper_bound": freq_data.curr_sclk_range.upper_bound, + }, + "curr_mclk_range": { + "lower_bound": freq_data.curr_mclk_range.lower_bound, + "upper_bound": freq_data.curr_mclk_range.upper_bound, + }, + "sclk_freq_limits": { + "lower_bound": freq_data.sclk_freq_limits.lower_bound, + "upper_bound": freq_data.sclk_freq_limits.upper_bound, + }, + "mclk_freq_limits": { + "lower_bound": freq_data.mclk_freq_limits.lower_bound, + "upper_bound": freq_data.mclk_freq_limits.upper_bound, + }, + "curve.vc_points": list(freq_data.curve.vc_points), + "num_regions": freq_data.num_regions, + } + + +def amdsmi_dev_gpu_metrics_info_get( + device_handle: amdsmi_wrapper.amdsmi_device_handle, +) -> Dict[str, Any]: + if not isinstance(device_handle, amdsmi_wrapper.amdsmi_device_handle): + raise AmdSmiParameterException( + device_handle, amdsmi_wrapper.amdsmi_device_handle + ) + + gpu_metrics = amdsmi_wrapper.amdsmi_gpu_metrics_t() + _check_res( + amdsmi_wrapper.amdsmi_dev_gpu_metrics_info_get( + device_handle, ctypes.byref(gpu_metrics) + ) + ) + + return { + "temperature_edge": gpu_metrics.temperature_edge, + "temperature_hotspot": gpu_metrics.temperature_hotspot, + "temperature_mem": gpu_metrics.temperature_mem, + "temperature_vrgfx": gpu_metrics.temperature_vrgfx, + "temperature_vrsoc": gpu_metrics.temperature_vrsoc, + "temperature_vrmem": gpu_metrics.temperature_vrmem, + "average_gfx_activity": gpu_metrics.average_gfx_activity, + "average_umc_activity": gpu_metrics.average_umc_activity, + "average_mm_activity": gpu_metrics.average_mm_activity, + "average_socket_power": gpu_metrics.average_socket_power, + "energy_accumulator": gpu_metrics.energy_accumulator, + "system_clock_counter": gpu_metrics.system_clock_counter, + "average_gfxclk_frequency": gpu_metrics.average_gfxclk_frequency, + "average_socclk_frequency": gpu_metrics.average_socclk_frequency, + "average_uclk_frequency": gpu_metrics.average_uclk_frequency, + "average_vclk0_frequency": gpu_metrics.average_vclk0_frequency, + "average_dclk0_frequency": gpu_metrics.average_dclk0_frequency, + "average_vclk1_frequency": gpu_metrics.average_vclk1_frequency, + "average_dclk1_frequency": gpu_metrics.average_dclk1_frequency, + "current_gfxclk": gpu_metrics.current_gfxclk, + "current_socclk": gpu_metrics.current_socclk, + "current_uclk": gpu_metrics.current_uclk, + "current_vclk0": gpu_metrics.current_vclk0, + "current_dclk0": gpu_metrics.current_dclk0, + "current_vclk1": gpu_metrics.current_vclk1, + "current_dclk1": gpu_metrics.current_dclk1, + "throttle_status": gpu_metrics.throttle_status, + "current_fan_speed": gpu_metrics.current_fan_speed, + "pcie_link_width": gpu_metrics.pcie_link_width, + "pcie_link_speed": gpu_metrics.pcie_link_speed, + "padding": gpu_metrics.padding, + "gfx_activity_acc": gpu_metrics.gfx_activity_acc, + "mem_actvity_acc": gpu_metrics.mem_actvity_acc, + "temperature_hbm": list(gpu_metrics.temperature_hbm), + } + + +def amdsmi_dev_od_volt_curve_regions_get( + device_handle: amdsmi_wrapper.amdsmi_device_handle, num_regions: int +) -> List[Dict[str, Any]]: + if not isinstance(device_handle, amdsmi_wrapper.amdsmi_device_handle): + raise AmdSmiParameterException( + device_handle, amdsmi_wrapper.amdsmi_device_handle + ) + if not isinstance(num_regions, int): + raise AmdSmiParameterException(num_regions, int) + + region_count = amdsmi_wrapper.c_uint32(num_regions) + buffer = (amdsmi_wrapper.amdsmi_freq_volt_region_t * num_regions)() + _check_res( + amdsmi_wrapper.amdsmi_dev_od_volt_curve_regions_get( + device_handle, ctypes.byref(region_count), buffer + ) + ) + + result = [] + + for idx in range(region_count.value): + result.extend( + [ + { + "freq_range": { + "lower_bound": buffer[idx].freq_range.lower_bound, + "upper_bound": buffer[idx].freq_range.upper_bound, + }, + "volt_range": { + "lower_bound": buffer[idx].volt_range.lower_bound, + "upper_bound": buffer[idx].volt_range.upper_bound, + }, + } + ] + ) + + return result + + +def amdsmi_dev_power_profile_presets_get( + device_handle: amdsmi_wrapper.amdsmi_device_handle, sensor_idx: int +) -> Dict[str, Any]: + if not isinstance(device_handle, amdsmi_wrapper.amdsmi_device_handle): + raise AmdSmiParameterException( + device_handle, amdsmi_wrapper.amdsmi_device_handle + ) + if not isinstance(sensor_idx, int): + raise AmdSmiParameterException(sensor_idx, int) + + status = amdsmi_wrapper.amdsmi_power_profile_status_t() + _check_res( + amdsmi_wrapper.amdsmi_dev_power_profile_presets_get( + device_handle, sensor_idx, ctypes.byref(status) + ) + ) + + return { + "available_profiles": status.available_profiles, + "current": status.current, + "num_profiles": status.num_profiles, + }