SWDEV-361376 - Added API calls to the python interface
- Added Physical State Query API calls
- Added Clock, Power and Performance Query API calls
- README for both additions
Change-Id: Icf412364a13c9e51b9630f19f29a7cdfbe46f7fe
Signed-off-by: Dalibor Stanisavljevic <Dalibor.Stanisavljevic@amd.com>
[ROCm/amdsmi commit: 62a69fb802]
Этот коммит содержится в:
@@ -1506,3 +1506,490 @@ try:
|
||||
except AmdSmiException as e:
|
||||
print(e)
|
||||
```
|
||||
## amdsmi_dev_fan_rpms_get
|
||||
Description: Get the fan speed in RPMs of the device with the specified device
|
||||
handle and 0-based sensor index.
|
||||
|
||||
Input parameters:
|
||||
* `device_handle` handle for the given device
|
||||
* `sensor_idx` a 0-based sensor index. Normally, this will be 0. If a device has
|
||||
more than one sensor, it could be greater than 0.
|
||||
|
||||
Output: Fan speed in rpms as integer
|
||||
|
||||
Exceptions that can be thrown by `amdsmi_dev_fan_rpms_get` function:
|
||||
* `AmdSmiLibraryException`
|
||||
* `AmdSmiRetryException`
|
||||
* `AmdSmiParameterException`
|
||||
|
||||
Example:
|
||||
```python
|
||||
try:
|
||||
devices = amdsmi_get_device_handles()
|
||||
if len(devices) == 0:
|
||||
print("No GPUs on machine")
|
||||
else:
|
||||
for device in devices:
|
||||
fan_rpm = amdsmi_dev_fan_rpms_get(device, 0)
|
||||
print(fan_rpm)
|
||||
except AmdSmiException as e:
|
||||
print(e)
|
||||
```
|
||||
## amdsmi_dev_fan_speed_get
|
||||
Description: Get the fan speed for the specified device as a value relative to
|
||||
AMDSMI_MAX_FAN_SPEED
|
||||
|
||||
Input parameters:
|
||||
* `device_handle` handle for the given device
|
||||
* `sensor_idx` a 0-based sensor index. Normally, this will be 0. If a device has
|
||||
more than one sensor, it could be greater than 0.
|
||||
|
||||
Output: Fan speed in relative to MAX
|
||||
|
||||
Exceptions that can be thrown by `amdsmi_dev_fan_speed_get` function:
|
||||
* `AmdSmiLibraryException`
|
||||
* `AmdSmiRetryException`
|
||||
* `AmdSmiParameterException`
|
||||
|
||||
Example:
|
||||
```python
|
||||
try:
|
||||
devices = amdsmi_get_device_handles()
|
||||
if len(devices) == 0:
|
||||
print("No GPUs on machine")
|
||||
else:
|
||||
for device in devices:
|
||||
fan_speed = amdsmi_dev_fan_speed_get(device, 0)
|
||||
print(fan_speed)
|
||||
except AmdSmiException as e:
|
||||
print(e)
|
||||
```
|
||||
## amdsmi_dev_fan_speed_max_get
|
||||
Description: Get the max fan speed of the device with provided device handle
|
||||
|
||||
Input parameters:
|
||||
* `device_handle` handle for the given device
|
||||
* `sensor_idx` a 0-based sensor index. Normally, this will be 0. If a device has
|
||||
more than one sensor, it could be greater than 0.
|
||||
|
||||
Output: Max fan speed as integer
|
||||
|
||||
Exceptions that can be thrown by `amdsmi_dev_fan_speed_max_get` function:
|
||||
* `AmdSmiLibraryException`
|
||||
* `AmdSmiRetryException`
|
||||
* `AmdSmiParameterException`
|
||||
|
||||
Example:
|
||||
```python
|
||||
try:
|
||||
devices = amdsmi_get_device_handles()
|
||||
if len(devices) == 0:
|
||||
print("No GPUs on machine")
|
||||
else:
|
||||
for device in devices:
|
||||
max_fan_speed = amdsmi_dev_fan_speed_max_get(device, 0)
|
||||
print(max_fan_speed)
|
||||
except AmdSmiException as e:
|
||||
print(e)
|
||||
```
|
||||
## amdsmi_dev_temp_metric_get
|
||||
Description: Get the temperature metric value for the specified metric, from the
|
||||
specified temperature sensor on the specified device
|
||||
|
||||
Input parameters:
|
||||
* `device_handle` handle for the given device
|
||||
* `sensor_type` part of device from which temperature should be obtained
|
||||
* `metric` enum indicated which temperature value should be retrieved
|
||||
|
||||
Output: Temperature as integer in millidegrees Celcius
|
||||
|
||||
Exceptions that can be thrown by `amdsmi_dev_temp_metric_get` function:
|
||||
* `AmdSmiLibraryException`
|
||||
* `AmdSmiRetryException`
|
||||
* `AmdSmiParameterException`
|
||||
|
||||
Example:
|
||||
```python
|
||||
try:
|
||||
devices = amdsmi_get_device_handles()
|
||||
if len(devices) == 0:
|
||||
print("No GPUs on machine")
|
||||
else:
|
||||
for device in devices:
|
||||
temp_metric = amdsmi_dev_temp_metric_get(dev, AmdSmiTemperatureType.EDGE,
|
||||
AmdSmiTemperatureMetric.CURRENT)
|
||||
print(temp_metric)
|
||||
except AmdSmiException as e:
|
||||
print(e)
|
||||
```
|
||||
## amdsmi_dev_volt_metric_get
|
||||
Description: Get the voltage metric value for the specified metric, from the
|
||||
specified voltage sensor on the specified device
|
||||
|
||||
Input parameters:
|
||||
* `device_handle` handle for the given device
|
||||
* `sensor_type` part of device from which voltage should be obtained
|
||||
* `metric` enum indicated which voltage value should be retrieved
|
||||
|
||||
Output: Voltage as integer in millivolts
|
||||
|
||||
Exceptions that can be thrown by `amdsmi_dev_volt_metric_get` function:
|
||||
* `AmdSmiLibraryException`
|
||||
* `AmdSmiRetryException`
|
||||
* `AmdSmiParameterException`
|
||||
|
||||
Example:
|
||||
```python
|
||||
try:
|
||||
devices = amdsmi_get_device_handles()
|
||||
if len(devices) == 0:
|
||||
print("No GPUs on machine")
|
||||
else:
|
||||
for device in devices:
|
||||
voltage = amdsmi_dev_volt_metric_get(dev, AmdSmiVoltageType.VDDGFX,
|
||||
AmdSmiVoltageMetric.AVERAGE)
|
||||
print(voltage)
|
||||
except AmdSmiException as e:
|
||||
print(e)
|
||||
```
|
||||
## amdsmi_dev_busy_percent_get
|
||||
Description: Get percentage of time device is busy doing any processing
|
||||
|
||||
Input parameters:
|
||||
* `device_handle` handle for the given device
|
||||
|
||||
Output: How busy the device is (as percentage of time)
|
||||
|
||||
Exceptions that can be thrown by `amdsmi_dev_busy_percent_get` function:
|
||||
* `AmdSmiLibraryException`
|
||||
* `AmdSmiRetryException`
|
||||
* `AmdSmiParameterException`
|
||||
|
||||
Example:
|
||||
```python
|
||||
try:
|
||||
devices = amdsmi_get_device_handles()
|
||||
if len(devices) == 0:
|
||||
print("No GPUs on machine")
|
||||
else:
|
||||
for device in devices:
|
||||
busy = amdsmi_dev_busy_percent_get(dev)
|
||||
print(busy)
|
||||
except AmdSmiException as e:
|
||||
print(e)
|
||||
```
|
||||
## amdsmi_utilization_count_get
|
||||
Description: Get coarse grain utilization counter of the specified device
|
||||
|
||||
Input parameters:
|
||||
* `device_handle` handle for the given device
|
||||
* `counter_types` variable number of counter types desired
|
||||
|
||||
Output: List containing dictionaries with fields
|
||||
|
||||
Field | Description
|
||||
---|---
|
||||
`timestamp` | The timestamp when the counter is retreived - Resolution: 1 ns
|
||||
`Dictionary for each counter` | <table> <thead><tr><th> Subfield </th><th>Description</th></tr></thead><tbody><tr><td>`type`</td><td>Type of utilization counter</td></tr><tr><td>`value`</td><td>Value gotten for utilization counter</td></tr></tbody></table>
|
||||
|
||||
Exceptions that can be thrown by `amdsmi_utilization_count_get` function:
|
||||
* `AmdSmiLibraryException`
|
||||
* `AmdSmiRetryException`
|
||||
* `AmdSmiParameterException`
|
||||
|
||||
Example:
|
||||
```python
|
||||
try:
|
||||
devices = amdsmi_get_device_handles()
|
||||
if len(devices) == 0:
|
||||
print("No GPUs on machine")
|
||||
else:
|
||||
for device in devices:
|
||||
utilization = amdsmi_utilization_count_get(
|
||||
dev,
|
||||
AmdSmiUtilizationCounterType.COARSE_GRAIN_GFX_ACTIVITY
|
||||
)
|
||||
print(utilization)
|
||||
utilization = amdsmi_utilization_count_get(
|
||||
dev,
|
||||
AmdSmiUtilizationCounterType.COARSE_GRAIN_GFX_ACTIVITY,
|
||||
AmdSmiUtilizationCounterType.COARSE_GRAIN_MEM_ACTIVITY
|
||||
)
|
||||
print(utilization)
|
||||
except AmdSmiException as e:
|
||||
print(e)
|
||||
```
|
||||
## amdsmi_dev_perf_level_get
|
||||
Description: Get the performance level of the device with provided device handle
|
||||
|
||||
Input parameters:
|
||||
* `device_handle` handle for the given device
|
||||
|
||||
Output: Performance level as enum value of dev_perf_level_t
|
||||
|
||||
Exceptions that can be thrown by `amdsmi_dev_perf_level_get` function:
|
||||
* `AmdSmiLibraryException`
|
||||
* `AmdSmiRetryException`
|
||||
* `AmdSmiParameterException`
|
||||
|
||||
Example:
|
||||
```python
|
||||
try:
|
||||
devices = amdsmi_get_device_handles()
|
||||
if len(devices) == 0:
|
||||
print("No GPUs on machine")
|
||||
else:
|
||||
for device in devices:
|
||||
perf_level = amdsmi_dev_perf_level_get(dev)
|
||||
print(perf_level)
|
||||
except AmdSmiException as e:
|
||||
print(e)
|
||||
```
|
||||
## amdsmi_perf_determinism_mode_set
|
||||
Description: Enter performance determinism mode with provided device handle
|
||||
|
||||
Input parameters:
|
||||
* `device_handle` handle for the given device
|
||||
* `clkvalue` softmax value for GFXCLK in MHz
|
||||
|
||||
Output: None
|
||||
|
||||
Exceptions that can be thrown by `amdsmi_perf_determinism_mode_set` function:
|
||||
* `AmdSmiLibraryException`
|
||||
* `AmdSmiRetryException`
|
||||
* `AmdSmiParameterException`
|
||||
|
||||
Example:
|
||||
```python
|
||||
try:
|
||||
devices = amdsmi_get_device_handles()
|
||||
if len(devices) == 0:
|
||||
print("No GPUs on machine")
|
||||
else:
|
||||
for device in devices:
|
||||
amdsmi_perf_determinism_mode_set(dev, 1333)
|
||||
except AmdSmiException as e:
|
||||
print(e)
|
||||
```
|
||||
## amdsmi_dev_overdrive_level_get
|
||||
Description: Get the overdrive percent associated with the device with provided
|
||||
device handle
|
||||
|
||||
Input parameters:
|
||||
* `device_handle` handle for the given device
|
||||
|
||||
Output: Overdrive percentage as integer
|
||||
|
||||
Exceptions that can be thrown by `amdsmi_dev_overdrive_level_get` function:
|
||||
* `AmdSmiLibraryException`
|
||||
* `AmdSmiRetryException`
|
||||
* `AmdSmiParameterException`
|
||||
|
||||
Example:
|
||||
```python
|
||||
try:
|
||||
devices = amdsmi_get_device_handles()
|
||||
if len(devices) == 0:
|
||||
print("No GPUs on machine")
|
||||
else:
|
||||
for device in devices:
|
||||
od_level = amdsmi_dev_overdrive_level_get(dev)
|
||||
print(od_level)
|
||||
except AmdSmiException as e:
|
||||
print(e)
|
||||
```
|
||||
## amdsmi_dev_gpu_clk_freq_get
|
||||
Description: Get the list of possible system clock speeds of device for a
|
||||
specified clock type
|
||||
|
||||
Input parameters:
|
||||
* `device_handle` handle for the given device
|
||||
* `clk_type` the type of clock for which the frequency is desired
|
||||
|
||||
Output: Dictionary with fields
|
||||
|
||||
Field | Description
|
||||
---|---
|
||||
`num_supported`| The number of supported frequencies
|
||||
`current`| The current frequency index
|
||||
`frequency`| List of frequencies, only the first num_supported frequencies are valid
|
||||
|
||||
Exceptions that can be thrown by `amdsmi_dev_gpu_clk_freq_get` function:
|
||||
* `AmdSmiLibraryException`
|
||||
* `AmdSmiRetryException`
|
||||
* `AmdSmiParameterException`
|
||||
|
||||
Example:
|
||||
```python
|
||||
try:
|
||||
devices = amdsmi_get_device_handles()
|
||||
if len(devices) == 0:
|
||||
print("No GPUs on machine")
|
||||
else:
|
||||
for device in devices:
|
||||
amdsmi_dev_gpu_clk_freq_get(dev, AmdSmiClockType.SYS)
|
||||
except AmdSmiException as e:
|
||||
print(e)
|
||||
```
|
||||
## amdsmi_dev_od_volt_info_get
|
||||
Description: This function retrieves the voltage/frequency curve information
|
||||
|
||||
Input parameters:
|
||||
* `device_handle` handle for the given device
|
||||
|
||||
Output: Dictionary with fields
|
||||
|
||||
Field | Description
|
||||
---|---
|
||||
`curr_sclk_range` | <table> <thead><tr><th> Subfield </th><th>Description</th></tr></thead><tbody><tr><td>`lower_bound`</td><td>lower bound sclk range</td></tr><tr><td>`upper_bound`</td><td>upper bound sclk range</td></tr></tbody></table>
|
||||
`curr_mclk_range` | <table> <thead><tr><th> Subfield </th><th>Description</th></tr></thead><tbody><tr><td>`lower_bound`</td><td>lower bound mclk range</td></tr><tr><td>`upper_bound`</td><td>upper bound mclk range</td></tr></tbody></table>
|
||||
`sclk_freq_limits` | <table> <thead><tr><th> Subfield </th><th>Description</th></tr></thead><tbody><tr><td>`lower_bound`</td><td>lower bound sclk range limt</td></tr><tr><td>`upper_bound`</td><td>upper bound sclk range limit</td></tr></tbody></table>
|
||||
`mclk_freq_limits` | <table> <thead><tr><th> Subfield </th><th>Description</th></tr></thead><tbody><tr><td>`lower_bound`</td><td>lower bound mclk range limit</td></tr><tr><td>`upper_bound`</td><td>upper bound mclk range limit</td></tr></tbody></table>
|
||||
`curve.vc_points`| The number of supported frequencies
|
||||
`num_regions`| The current frequency index
|
||||
|
||||
|
||||
Exceptions that can be thrown by `amdsmi_dev_od_volt_info_get` function:
|
||||
* `AmdSmiLibraryException`
|
||||
* `AmdSmiRetryException`
|
||||
* `AmdSmiParameterException`
|
||||
|
||||
Example:
|
||||
```python
|
||||
try:
|
||||
devices = amdsmi_get_device_handles()
|
||||
if len(devices) == 0:
|
||||
print("No GPUs on machine")
|
||||
else:
|
||||
for device in devices:
|
||||
amdsmi_dev_od_volt_info_get(dev)
|
||||
except AmdSmiException as e:
|
||||
print(e)
|
||||
```
|
||||
## amdsmi_dev_gpu_metrics_info_get
|
||||
Description: This function retrieves the gpu metrics information
|
||||
|
||||
Input parameters:
|
||||
* `device_handle` handle for the given device
|
||||
|
||||
Output: Dictionary with fields
|
||||
|
||||
Field | Description
|
||||
`---|---
|
||||
`temperature_edge` | edge temperature value
|
||||
`temperature_hotspot` | hotspot temperature value
|
||||
`temperature_mem` | memory temperature value
|
||||
`temperature_vrgfx` | vrgfx temperature value
|
||||
`temperature_vrsoc` | vrsoc temperature value
|
||||
`temperature_vrmem` | vrmem temperature value
|
||||
`average_gfx_activity` | average gfx activity
|
||||
`average_umc_activity` | average umc activity
|
||||
`average_mm_activity` | average mm activity
|
||||
`average_socket_power` | average socket power
|
||||
`energy_accumulator` | energy accumulator value
|
||||
`system_clock_counter` | system clock counter
|
||||
`average_gfxclk_frequency` | average gfx clock frequency
|
||||
`average_socclk_frequency` | average soc clock frequency
|
||||
`average_uclk_frequency` | average uclk frequency
|
||||
`average_vclk0_frequency` | average vclk0 frequency
|
||||
`average_dclk0_frequency` | average dclk0 frequency
|
||||
`average_vclk1_frequency` | average vclk1 frequency
|
||||
`average_dclk1_frequency` | average dclk1 frequency
|
||||
`current_gfxclk` | current gfx clock
|
||||
`current_socclk` | current soc clock
|
||||
`current_uclk` | current uclk
|
||||
`current_vclk0` | current vclk0
|
||||
`current_dclk0` | current dclk0
|
||||
`current_vclk1` | current vclk1
|
||||
`current_dclk1` | current dclk1
|
||||
`throttle_status` | current throttle status
|
||||
`current_fan_speed` | current fan speed
|
||||
`pcie_link_width` | pcie link width
|
||||
`pcie_link_speed` | pcie link speed
|
||||
`padding` | padding
|
||||
`gfx_activity_acc` | gfx activity acc
|
||||
`mem_actvity_acc` | mem activity acc
|
||||
`temperature_hbm` | hbm temperature
|
||||
|
||||
Exceptions that can be thrown by `amdsmi_dev_gpu_metrics_info_get` function:
|
||||
* `AmdSmiLibraryException`
|
||||
* `AmdSmiRetryException`
|
||||
* `AmdSmiParameterException`
|
||||
|
||||
Example:
|
||||
```python
|
||||
try:
|
||||
devices = amdsmi_get_device_handles()
|
||||
if len(devices) == 0:
|
||||
print("No GPUs on machine")
|
||||
else:
|
||||
for device in devices:
|
||||
amdsmi_dev_gpu_metrics_info_get(dev)
|
||||
except AmdSmiException as e:
|
||||
print(e)
|
||||
```
|
||||
## amdsmi_dev_od_volt_curve_regions_get
|
||||
Description: This function will retrieve the current valid regions in the
|
||||
frequency/voltage space
|
||||
|
||||
Input parameters:
|
||||
* `device_handle` handle for the given device
|
||||
* `num_regions` number of freq volt regions
|
||||
|
||||
Output: List containing a dictionary with fields for each freq volt region
|
||||
|
||||
Field | Description
|
||||
---|---
|
||||
`freq_range` | <table> <thead><tr><th> Subfield </th><th>Description</th></tr></thead><tbody><tr><td>`lower_bound`</td><td>lower bound freq range</td></tr><tr><td>`upper_bound`</td><td>upper bound freq range</td></tr></tbody></table>
|
||||
`volt_range` | <table> <thead><tr><th> Subfield </th><th>Description</th></tr></thead><tbody><tr><td>`lower_bound`</td><td>lower bound volt range</td></tr><tr><td>`upper_bound`</td><td>upper bound volt range</td></tr></tbody></table>
|
||||
|
||||
Exceptions that can be thrown by `amdsmi_dev_od_volt_curve_regions_get` function:
|
||||
* `AmdSmiLibraryException`
|
||||
* `AmdSmiRetryException`
|
||||
* `AmdSmiParameterException`
|
||||
|
||||
Example:
|
||||
```python
|
||||
try:
|
||||
devices = amdsmi_get_device_handles()
|
||||
if len(devices) == 0:
|
||||
print("No GPUs on machine")
|
||||
else:
|
||||
for device in devices:
|
||||
amdsmi_dev_od_volt_curve_regions_get(dev, 3)
|
||||
except AmdSmiException as e:
|
||||
print(e)
|
||||
```
|
||||
## amdsmi_dev_power_profile_presets_get
|
||||
Description: Get the list of available preset power profiles and an indication of
|
||||
which profile is currently active
|
||||
|
||||
Input parameters:
|
||||
* `device_handle` handle for the given device
|
||||
* `sensor_idx` number of freq volt regions
|
||||
|
||||
Output: Dictionary with fields
|
||||
|
||||
Field | Description
|
||||
---|---
|
||||
`available_profiles`| Which profiles are supported by this system
|
||||
`current`| Which power profile is currently active
|
||||
`num_profiles`| How many power profiles are available
|
||||
|
||||
Exceptions that can be thrown by `amdsmi_dev_power_profile_presets_get` function:
|
||||
* `AmdSmiLibraryException`
|
||||
* `AmdSmiRetryException`
|
||||
* `AmdSmiParameterException`
|
||||
|
||||
Example:
|
||||
```python
|
||||
try:
|
||||
devices = amdsmi_get_device_handles()
|
||||
if len(devices) == 0:
|
||||
print("No GPUs on machine")
|
||||
else:
|
||||
for device in devices:
|
||||
amdsmi_dev_power_profile_presets_get(dev, 0)
|
||||
except AmdSmiException as e:
|
||||
print(e)
|
||||
```
|
||||
|
||||
@@ -87,6 +87,24 @@ from .amdsmi_interface import amdsmi_dev_od_clk_info_set
|
||||
from .amdsmi_interface import amdsmi_dev_od_volt_info_set
|
||||
from .amdsmi_interface import amdsmi_dev_perf_level_set_v1
|
||||
|
||||
# # Physical State Queries
|
||||
from .amdsmi_interface import amdsmi_dev_fan_rpms_get
|
||||
from .amdsmi_interface import amdsmi_dev_fan_speed_get
|
||||
from .amdsmi_interface import amdsmi_dev_fan_speed_max_get
|
||||
from .amdsmi_interface import amdsmi_dev_temp_metric_get
|
||||
from .amdsmi_interface import amdsmi_dev_volt_metric_get
|
||||
|
||||
# # Clock, Power and Performance Query
|
||||
from .amdsmi_interface import amdsmi_dev_busy_percent_get
|
||||
from .amdsmi_interface import amdsmi_utilization_count_get
|
||||
from .amdsmi_interface import amdsmi_dev_perf_level_get
|
||||
from .amdsmi_interface import amdsmi_perf_determinism_mode_set
|
||||
from .amdsmi_interface import amdsmi_dev_overdrive_level_get
|
||||
from .amdsmi_interface import amdsmi_dev_gpu_clk_freq_get
|
||||
from .amdsmi_interface import amdsmi_dev_od_volt_info_get
|
||||
from .amdsmi_interface import amdsmi_dev_gpu_metrics_info_get
|
||||
from .amdsmi_interface import amdsmi_dev_od_volt_curve_regions_get
|
||||
from .amdsmi_interface import amdsmi_dev_power_profile_presets_get
|
||||
|
||||
# # Events
|
||||
from .amdsmi_interface import AmdSmiEventReader
|
||||
|
||||
@@ -22,7 +22,6 @@
|
||||
import ctypes
|
||||
from typing import Union, Any, Dict, List, Tuple
|
||||
from enum import IntEnum
|
||||
from collections.abc import Iterable
|
||||
|
||||
from . import amdsmi_wrapper
|
||||
from .amdsmi_exception import *
|
||||
@@ -292,43 +291,56 @@ class AmdSmiUtilizationCounterType(IntEnum):
|
||||
|
||||
|
||||
class AmdSmiEventReader:
|
||||
def __init__(self, device_handle: amdsmi_wrapper.amdsmi_device_handle, *event_types):
|
||||
def __init__(
|
||||
self, device_handle: amdsmi_wrapper.amdsmi_device_handle, *event_types
|
||||
):
|
||||
if not isinstance(device_handle, amdsmi_wrapper.amdsmi_device_handle):
|
||||
raise AmdSmiParameterException(
|
||||
device_handle, amdsmi_wrapper.amdsmi_device_handle
|
||||
)
|
||||
)
|
||||
if not isinstance(event_types, Iterable):
|
||||
raise AmdSmiParameterException(
|
||||
event_types, Iterable
|
||||
)
|
||||
raise AmdSmiParameterException(event_types, Iterable)
|
||||
|
||||
for event_type in event_types:
|
||||
if not isinstance(event_type, AmdSmiEvtNotificationType):
|
||||
raise AmdSmiParameterException(
|
||||
event_type, AmdSmiEvtNotificationType
|
||||
)
|
||||
raise AmdSmiParameterException(event_type, AmdSmiEvtNotificationType)
|
||||
|
||||
self.device_handle = device_handle
|
||||
mask = 0
|
||||
for event_type in event_types:
|
||||
mask |= (1 << (int(event_type) - 1))
|
||||
mask |= 1 << (int(event_type) - 1)
|
||||
|
||||
_check_res(amdsmi_wrapper.amdsmi_event_notification_init(device_handle))
|
||||
_check_res(amdsmi_wrapper.amdsmi_event_notification_mask_set(device_handle, ctypes.c_uint64(mask)))
|
||||
_check_res(
|
||||
amdsmi_wrapper.amdsmi_event_notification_mask_set(
|
||||
device_handle, ctypes.c_uint64(mask)
|
||||
)
|
||||
)
|
||||
|
||||
def read(self, timestamp, num_elem = 10):
|
||||
def read(self, timestamp, num_elem=10):
|
||||
self.event_info = (amdsmi_wrapper.amdsmi_evt_notification_data_t * num_elem)()
|
||||
_check_res(amdsmi_wrapper.amdsmi_event_notification_get(ctypes.c_int(timestamp), ctypes.byref(
|
||||
ctypes.c_uint32(num_elem)), self.event_info))
|
||||
_check_res(
|
||||
amdsmi_wrapper.amdsmi_event_notification_get(
|
||||
ctypes.c_int(timestamp),
|
||||
ctypes.byref(ctypes.c_uint32(num_elem)),
|
||||
self.event_info,
|
||||
)
|
||||
)
|
||||
|
||||
ret = list()
|
||||
for i in range(0, num_elem):
|
||||
if self.event_info[i].event in set(event.value for event in AmdSmiEvtNotificationType):
|
||||
ret.append({
|
||||
'device_handle' : self.event_info[i].device_handle,
|
||||
'event': AmdSmiEvtNotificationType(self.event_info[i].event).name,
|
||||
'message': self.event_info[i].message.decode("utf-8")
|
||||
})
|
||||
if self.event_info[i].event in set(
|
||||
event.value for event in AmdSmiEvtNotificationType
|
||||
):
|
||||
ret.append(
|
||||
{
|
||||
"device_handle": self.event_info[i].device_handle,
|
||||
"event": AmdSmiEvtNotificationType(
|
||||
self.event_info[i].event
|
||||
).name,
|
||||
"message": self.event_info[i].message.decode("utf-8"),
|
||||
}
|
||||
)
|
||||
|
||||
return ret
|
||||
|
||||
@@ -1341,3 +1353,401 @@ def amdsmi_dev_perf_level_set_v1(
|
||||
raise AmdSmiParameterException(perf_lvl, AmdSmiDevPerfLevel)
|
||||
|
||||
_check_res(amdsmi_wrapper.amdsmi_dev_perf_level_set_v1(device_handle, perf_lvl))
|
||||
|
||||
|
||||
def amdsmi_dev_fan_rpms_get(
|
||||
device_handle: amdsmi_wrapper.amdsmi_device_handle, sensor_idx: int
|
||||
) -> int:
|
||||
if not isinstance(device_handle, amdsmi_wrapper.amdsmi_device_handle):
|
||||
raise AmdSmiParameterException(
|
||||
device_handle, amdsmi_wrapper.amdsmi_device_handle
|
||||
)
|
||||
if not isinstance(sensor_idx, int):
|
||||
raise AmdSmiParameterException(sensor_idx, int)
|
||||
fan_speed = amdsmi_wrapper.c_int64()
|
||||
_check_res(
|
||||
amdsmi_wrapper.amdsmi_dev_fan_rpms_get(
|
||||
device_handle, sensor_idx, ctypes.byref(fan_speed)
|
||||
)
|
||||
)
|
||||
|
||||
return fan_speed.value
|
||||
|
||||
|
||||
def amdsmi_dev_fan_speed_get(
|
||||
device_handle: amdsmi_wrapper.amdsmi_device_handle, sensor_idx: int
|
||||
) -> int:
|
||||
if not isinstance(device_handle, amdsmi_wrapper.amdsmi_device_handle):
|
||||
raise AmdSmiParameterException(
|
||||
device_handle, amdsmi_wrapper.amdsmi_device_handle
|
||||
)
|
||||
if not isinstance(sensor_idx, int):
|
||||
raise AmdSmiParameterException(sensor_idx, int)
|
||||
fan_speed = amdsmi_wrapper.c_int64()
|
||||
_check_res(
|
||||
amdsmi_wrapper.amdsmi_dev_fan_speed_get(
|
||||
device_handle, sensor_idx, ctypes.byref(fan_speed)
|
||||
)
|
||||
)
|
||||
|
||||
return fan_speed.value
|
||||
|
||||
|
||||
def amdsmi_dev_fan_speed_max_get(
|
||||
device_handle: amdsmi_wrapper.amdsmi_device_handle, sensor_idx: int
|
||||
) -> int:
|
||||
if not isinstance(device_handle, amdsmi_wrapper.amdsmi_device_handle):
|
||||
raise AmdSmiParameterException(
|
||||
device_handle, amdsmi_wrapper.amdsmi_device_handle
|
||||
)
|
||||
if not isinstance(sensor_idx, int):
|
||||
raise AmdSmiParameterException(sensor_idx, int)
|
||||
fan_speed = amdsmi_wrapper.c_uint64()
|
||||
_check_res(
|
||||
amdsmi_wrapper.amdsmi_dev_fan_speed_max_get(
|
||||
device_handle, sensor_idx, ctypes.byref(fan_speed)
|
||||
)
|
||||
)
|
||||
|
||||
return fan_speed.value
|
||||
|
||||
|
||||
def amdsmi_dev_temp_metric_get(
|
||||
device_handle: amdsmi_wrapper.amdsmi_device_handle,
|
||||
sensor_type: AmdSmiTemperatureType,
|
||||
metric: AmdSmiTemperatureMetric,
|
||||
) -> int:
|
||||
if not isinstance(device_handle, amdsmi_wrapper.amdsmi_device_handle):
|
||||
raise AmdSmiParameterException(
|
||||
device_handle, amdsmi_wrapper.amdsmi_device_handle
|
||||
)
|
||||
if not isinstance(sensor_type, AmdSmiTemperatureType):
|
||||
raise AmdSmiParameterException(sensor_type, AmdSmiTemperatureType)
|
||||
if not isinstance(metric, AmdSmiTemperatureMetric):
|
||||
raise AmdSmiParameterException(metric, AmdSmiTemperatureMetric)
|
||||
|
||||
temp_value = amdsmi_wrapper.c_int64()
|
||||
_check_res(
|
||||
amdsmi_wrapper.amdsmi_dev_temp_metric_get(
|
||||
device_handle, sensor_type, metric, ctypes.byref(temp_value)
|
||||
)
|
||||
)
|
||||
|
||||
return temp_value.value
|
||||
|
||||
|
||||
def amdsmi_dev_volt_metric_get(
|
||||
device_handle: amdsmi_wrapper.amdsmi_device_handle,
|
||||
sensor_type: AmdSmiVoltageType,
|
||||
metric: AmdSmiVoltageMetric,
|
||||
) -> int:
|
||||
if not isinstance(device_handle, amdsmi_wrapper.amdsmi_device_handle):
|
||||
raise AmdSmiParameterException(
|
||||
device_handle, amdsmi_wrapper.amdsmi_device_handle
|
||||
)
|
||||
if not isinstance(sensor_type, AmdSmiVoltageType):
|
||||
raise AmdSmiParameterException(sensor_type, AmdSmiVoltageType)
|
||||
if not isinstance(metric, AmdSmiVoltageMetric):
|
||||
raise AmdSmiParameterException(metric, AmdSmiVoltageMetric)
|
||||
|
||||
voltage = amdsmi_wrapper.c_int64()
|
||||
_check_res(
|
||||
amdsmi_wrapper.amdsmi_dev_volt_metric_get(
|
||||
device_handle, sensor_type, metric, ctypes.byref(voltage)
|
||||
)
|
||||
)
|
||||
|
||||
return voltage.value
|
||||
|
||||
|
||||
def amdsmi_dev_busy_percent_get(
|
||||
device_handle: amdsmi_wrapper.amdsmi_device_handle,
|
||||
) -> int:
|
||||
if not isinstance(device_handle, amdsmi_wrapper.amdsmi_device_handle):
|
||||
raise AmdSmiParameterException(
|
||||
device_handle, amdsmi_wrapper.amdsmi_device_handle
|
||||
)
|
||||
|
||||
busy_percent = amdsmi_wrapper.c_uint32()
|
||||
_check_res(
|
||||
amdsmi_wrapper.amdsmi_dev_busy_percent_get(
|
||||
device_handle, ctypes.byref(busy_percent)
|
||||
)
|
||||
)
|
||||
|
||||
return busy_percent.value
|
||||
|
||||
|
||||
def amdsmi_utilization_count_get(
|
||||
device_handle: amdsmi_wrapper.amdsmi_device_handle,
|
||||
*counter_types: Tuple[AmdSmiUtilizationCounterType]
|
||||
) -> List[Dict[str, Any]]:
|
||||
if not isinstance(device_handle, amdsmi_wrapper.amdsmi_device_handle):
|
||||
raise AmdSmiParameterException(
|
||||
device_handle, amdsmi_wrapper.amdsmi_device_handle
|
||||
)
|
||||
if not len(counter_types):
|
||||
raise AmdSmiLibraryException(amdsmi_wrapper.AMDSMI_STATUS_INVAL)
|
||||
counters = []
|
||||
for counter_type in counter_types:
|
||||
if not isinstance(counter_type, AmdSmiUtilizationCounterType):
|
||||
raise AmdSmiParameterException(counter_type, AmdSmiUtilizationCounterType)
|
||||
counter = amdsmi_wrapper.amdsmi_utilization_counter_t()
|
||||
counter.type = counter_type
|
||||
counters.append(counter)
|
||||
|
||||
count = amdsmi_wrapper.c_uint32(len(counters))
|
||||
timestamp = amdsmi_wrapper.c_uint64()
|
||||
util_counter_list = (amdsmi_wrapper.amdsmi_utilization_counter_t * len(counters))(
|
||||
*counters
|
||||
)
|
||||
|
||||
_check_res(
|
||||
amdsmi_wrapper.amdsmi_utilization_count_get(
|
||||
device_handle, util_counter_list, count, ctypes.byref(timestamp)
|
||||
)
|
||||
)
|
||||
if count.value != len(counters):
|
||||
raise AmdSmiLibraryException(amdsmi_wrapper.AMDSMI_STATUS_API_FAILED)
|
||||
|
||||
result = [{"timestamp": timestamp.value}]
|
||||
for idx in range(count.value):
|
||||
counter_type = amdsmi_wrapper.c__EA_AMDSMI_UTILIZATION_COUNTER_TYPE__enumvalues[
|
||||
util_counter_list[idx].type
|
||||
]
|
||||
if counter_type == "AMDSMI_UTILIZATION_COUNTER_LAST":
|
||||
counter_type = "AMDSMI_COARSE_GRAIN_MEM_ACTIVITY"
|
||||
result.append({"type": counter_type, "value": util_counter_list[idx].value})
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def amdsmi_dev_perf_level_get(
|
||||
device_handle: amdsmi_wrapper.amdsmi_device_handle,
|
||||
) -> str:
|
||||
if not isinstance(device_handle, amdsmi_wrapper.amdsmi_device_handle):
|
||||
raise AmdSmiParameterException(
|
||||
device_handle, amdsmi_wrapper.amdsmi_device_handle
|
||||
)
|
||||
|
||||
perf = amdsmi_wrapper.amdsmi_dev_perf_level_t()
|
||||
|
||||
_check_res(
|
||||
amdsmi_wrapper.amdsmi_dev_perf_level_get(device_handle, ctypes.byref(perf))
|
||||
)
|
||||
|
||||
result = amdsmi_wrapper.c__EA_amdsmi_dev_perf_level_t__enumvalues[perf.value]
|
||||
if result == "AMDSMI_DEV_PERF_LEVEL_FIRST":
|
||||
result = "AMDSMI_DEV_PERF_LEVEL_AUTO"
|
||||
if result == "AMDSMI_DEV_PERF_LEVEL_LAST":
|
||||
result = "AMDSMI_DEV_PERF_LEVEL_DETERMINISM"
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def amdsmi_perf_determinism_mode_set(
|
||||
device_handle: amdsmi_wrapper.amdsmi_device_handle, clkvalue: int
|
||||
) -> None:
|
||||
if not isinstance(device_handle, amdsmi_wrapper.amdsmi_device_handle):
|
||||
raise AmdSmiParameterException(
|
||||
device_handle, amdsmi_wrapper.amdsmi_device_handle
|
||||
)
|
||||
if not isinstance(clkvalue, int):
|
||||
raise AmdSmiParameterException(clkvalue, int)
|
||||
|
||||
_check_res(amdsmi_wrapper.amdsmi_perf_determinism_mode_set(device_handle, clkvalue))
|
||||
|
||||
|
||||
def amdsmi_dev_overdrive_level_get(
|
||||
device_handle: amdsmi_wrapper.amdsmi_device_handle,
|
||||
) -> int:
|
||||
if not isinstance(device_handle, amdsmi_wrapper.amdsmi_device_handle):
|
||||
raise AmdSmiParameterException(
|
||||
device_handle, amdsmi_wrapper.amdsmi_device_handle
|
||||
)
|
||||
|
||||
od_level = amdsmi_wrapper.c_uint32()
|
||||
_check_res(
|
||||
amdsmi_wrapper.amdsmi_dev_overdrive_level_get(
|
||||
device_handle, ctypes.byref(od_level)
|
||||
)
|
||||
)
|
||||
|
||||
return od_level.value
|
||||
|
||||
|
||||
def amdsmi_dev_gpu_clk_freq_get(
|
||||
device_handle: amdsmi_wrapper.amdsmi_device_handle, clk_type: AmdSmiClockType
|
||||
) -> Dict[str, Any]:
|
||||
if not isinstance(device_handle, amdsmi_wrapper.amdsmi_device_handle):
|
||||
raise AmdSmiParameterException(
|
||||
device_handle, amdsmi_wrapper.amdsmi_device_handle
|
||||
)
|
||||
if not isinstance(clk_type, AmdSmiClockType):
|
||||
raise AmdSmiParameterException(clk_type, AmdSmiClockType)
|
||||
|
||||
freq = amdsmi_wrapper.amdsmi_frequencies_t()
|
||||
_check_res(
|
||||
amdsmi_wrapper.amdsmi_dev_gpu_clk_freq_get(
|
||||
device_handle, clk_type, ctypes.byref(freq)
|
||||
)
|
||||
)
|
||||
|
||||
return {
|
||||
"num_supported": freq.num_supported,
|
||||
"current": freq.current,
|
||||
"frequency": list(freq.frequency)[: freq.num_supported - 1],
|
||||
}
|
||||
|
||||
|
||||
def amdsmi_dev_od_volt_info_get(
|
||||
device_handle: amdsmi_wrapper.amdsmi_device_handle,
|
||||
) -> Dict[str, Any]:
|
||||
if not isinstance(device_handle, amdsmi_wrapper.amdsmi_device_handle):
|
||||
raise AmdSmiParameterException(
|
||||
device_handle, amdsmi_wrapper.amdsmi_device_handle
|
||||
)
|
||||
|
||||
freq_data = amdsmi_wrapper.amdsmi_od_volt_freq_data_t()
|
||||
_check_res(
|
||||
amdsmi_wrapper.amdsmi_dev_od_volt_info_get(
|
||||
device_handle, ctypes.byref(freq_data)
|
||||
)
|
||||
)
|
||||
|
||||
return {
|
||||
"curr_sclk_range": {
|
||||
"lower_bound": freq_data.curr_sclk_range.lower_bound,
|
||||
"upper_bound": freq_data.curr_sclk_range.upper_bound,
|
||||
},
|
||||
"curr_mclk_range": {
|
||||
"lower_bound": freq_data.curr_mclk_range.lower_bound,
|
||||
"upper_bound": freq_data.curr_mclk_range.upper_bound,
|
||||
},
|
||||
"sclk_freq_limits": {
|
||||
"lower_bound": freq_data.sclk_freq_limits.lower_bound,
|
||||
"upper_bound": freq_data.sclk_freq_limits.upper_bound,
|
||||
},
|
||||
"mclk_freq_limits": {
|
||||
"lower_bound": freq_data.mclk_freq_limits.lower_bound,
|
||||
"upper_bound": freq_data.mclk_freq_limits.upper_bound,
|
||||
},
|
||||
"curve.vc_points": list(freq_data.curve.vc_points),
|
||||
"num_regions": freq_data.num_regions,
|
||||
}
|
||||
|
||||
|
||||
def amdsmi_dev_gpu_metrics_info_get(
|
||||
device_handle: amdsmi_wrapper.amdsmi_device_handle,
|
||||
) -> Dict[str, Any]:
|
||||
if not isinstance(device_handle, amdsmi_wrapper.amdsmi_device_handle):
|
||||
raise AmdSmiParameterException(
|
||||
device_handle, amdsmi_wrapper.amdsmi_device_handle
|
||||
)
|
||||
|
||||
gpu_metrics = amdsmi_wrapper.amdsmi_gpu_metrics_t()
|
||||
_check_res(
|
||||
amdsmi_wrapper.amdsmi_dev_gpu_metrics_info_get(
|
||||
device_handle, ctypes.byref(gpu_metrics)
|
||||
)
|
||||
)
|
||||
|
||||
return {
|
||||
"temperature_edge": gpu_metrics.temperature_edge,
|
||||
"temperature_hotspot": gpu_metrics.temperature_hotspot,
|
||||
"temperature_mem": gpu_metrics.temperature_mem,
|
||||
"temperature_vrgfx": gpu_metrics.temperature_vrgfx,
|
||||
"temperature_vrsoc": gpu_metrics.temperature_vrsoc,
|
||||
"temperature_vrmem": gpu_metrics.temperature_vrmem,
|
||||
"average_gfx_activity": gpu_metrics.average_gfx_activity,
|
||||
"average_umc_activity": gpu_metrics.average_umc_activity,
|
||||
"average_mm_activity": gpu_metrics.average_mm_activity,
|
||||
"average_socket_power": gpu_metrics.average_socket_power,
|
||||
"energy_accumulator": gpu_metrics.energy_accumulator,
|
||||
"system_clock_counter": gpu_metrics.system_clock_counter,
|
||||
"average_gfxclk_frequency": gpu_metrics.average_gfxclk_frequency,
|
||||
"average_socclk_frequency": gpu_metrics.average_socclk_frequency,
|
||||
"average_uclk_frequency": gpu_metrics.average_uclk_frequency,
|
||||
"average_vclk0_frequency": gpu_metrics.average_vclk0_frequency,
|
||||
"average_dclk0_frequency": gpu_metrics.average_dclk0_frequency,
|
||||
"average_vclk1_frequency": gpu_metrics.average_vclk1_frequency,
|
||||
"average_dclk1_frequency": gpu_metrics.average_dclk1_frequency,
|
||||
"current_gfxclk": gpu_metrics.current_gfxclk,
|
||||
"current_socclk": gpu_metrics.current_socclk,
|
||||
"current_uclk": gpu_metrics.current_uclk,
|
||||
"current_vclk0": gpu_metrics.current_vclk0,
|
||||
"current_dclk0": gpu_metrics.current_dclk0,
|
||||
"current_vclk1": gpu_metrics.current_vclk1,
|
||||
"current_dclk1": gpu_metrics.current_dclk1,
|
||||
"throttle_status": gpu_metrics.throttle_status,
|
||||
"current_fan_speed": gpu_metrics.current_fan_speed,
|
||||
"pcie_link_width": gpu_metrics.pcie_link_width,
|
||||
"pcie_link_speed": gpu_metrics.pcie_link_speed,
|
||||
"padding": gpu_metrics.padding,
|
||||
"gfx_activity_acc": gpu_metrics.gfx_activity_acc,
|
||||
"mem_actvity_acc": gpu_metrics.mem_actvity_acc,
|
||||
"temperature_hbm": list(gpu_metrics.temperature_hbm),
|
||||
}
|
||||
|
||||
|
||||
def amdsmi_dev_od_volt_curve_regions_get(
|
||||
device_handle: amdsmi_wrapper.amdsmi_device_handle, num_regions: int
|
||||
) -> List[Dict[str, Any]]:
|
||||
if not isinstance(device_handle, amdsmi_wrapper.amdsmi_device_handle):
|
||||
raise AmdSmiParameterException(
|
||||
device_handle, amdsmi_wrapper.amdsmi_device_handle
|
||||
)
|
||||
if not isinstance(num_regions, int):
|
||||
raise AmdSmiParameterException(num_regions, int)
|
||||
|
||||
region_count = amdsmi_wrapper.c_uint32(num_regions)
|
||||
buffer = (amdsmi_wrapper.amdsmi_freq_volt_region_t * num_regions)()
|
||||
_check_res(
|
||||
amdsmi_wrapper.amdsmi_dev_od_volt_curve_regions_get(
|
||||
device_handle, ctypes.byref(region_count), buffer
|
||||
)
|
||||
)
|
||||
|
||||
result = []
|
||||
|
||||
for idx in range(region_count.value):
|
||||
result.extend(
|
||||
[
|
||||
{
|
||||
"freq_range": {
|
||||
"lower_bound": buffer[idx].freq_range.lower_bound,
|
||||
"upper_bound": buffer[idx].freq_range.upper_bound,
|
||||
},
|
||||
"volt_range": {
|
||||
"lower_bound": buffer[idx].volt_range.lower_bound,
|
||||
"upper_bound": buffer[idx].volt_range.upper_bound,
|
||||
},
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def amdsmi_dev_power_profile_presets_get(
|
||||
device_handle: amdsmi_wrapper.amdsmi_device_handle, sensor_idx: int
|
||||
) -> Dict[str, Any]:
|
||||
if not isinstance(device_handle, amdsmi_wrapper.amdsmi_device_handle):
|
||||
raise AmdSmiParameterException(
|
||||
device_handle, amdsmi_wrapper.amdsmi_device_handle
|
||||
)
|
||||
if not isinstance(sensor_idx, int):
|
||||
raise AmdSmiParameterException(sensor_idx, int)
|
||||
|
||||
status = amdsmi_wrapper.amdsmi_power_profile_status_t()
|
||||
_check_res(
|
||||
amdsmi_wrapper.amdsmi_dev_power_profile_presets_get(
|
||||
device_handle, sensor_idx, ctypes.byref(status)
|
||||
)
|
||||
)
|
||||
|
||||
return {
|
||||
"available_profiles": status.available_profiles,
|
||||
"current": status.current,
|
||||
"num_profiles": status.num_profiles,
|
||||
}
|
||||
|
||||
Ссылка в новой задаче
Block a user