19030e5b72
- Add support for only CPU if only the hsmp driver is driver is present.
- Add support for both the amdgpu and amdcpu's if both the amdgpu driver and cpu's are present.
- Add support for socket power metrics
- Add support for hsmp proto type version, prochot status, read current fclkmclk freq
and current cclk freq limit, c0 residency, lclk dpm level range, socket frequency range
- Add CPU socket current frequency limit.
- Update tool for API's IO bandwidth, XGMI bandwidth,
power telemetry rails, APB enable and APB disable API's
- Add support set_pow_limit, set_xgmi_link_width, set_lclk_dpm_level, core_boost_limit,
curr_active_freq_core_limit, set_soc_boost_limit and set_core_boost_limit.
- Add support for the following cpu related API's in tool
core_energy, socket energy, set power efficiency mode, ddr bandwidth,
cpu temperature, dimm temperature range rate, dimm power consumption
and dimm thermal temperature.
- Add support for set_gmi3_link_width, set_pcie_lnk_rate, set_df_pstate_range
Change-Id: I5a35d1cceeb7df0bc8b7116df7c27bb7f376e839
142 строки
5.5 KiB
Python
142 строки
5.5 KiB
Python
#!/usr/bin/env python3
|
|
#
|
|
# Copyright (C) 2023 Advanced Micro Devices. All rights reserved.
|
|
#
|
|
# Permission is hereby granted, free of charge, to any person obtaining a copy of
|
|
# this software and associated documentation files (the "Software"), to deal in
|
|
# the Software without restriction, including without limitation the rights to
|
|
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
|
# the Software, and to permit persons to whom the Software is furnished to do so,
|
|
# subject to the following conditions:
|
|
#
|
|
# The above copyright notice and this permission notice shall be included in all
|
|
# copies or substantial portions of the Software.
|
|
#
|
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
|
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
|
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
|
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
#
|
|
|
|
### Handle safe initialization for amdsmi
|
|
|
|
import atexit
|
|
import logging
|
|
import signal
|
|
import sys
|
|
|
|
from pathlib import Path
|
|
|
|
sys.path.append(f"{Path(__file__).resolve().parent}/../../share/amd_smi")
|
|
|
|
# If the python library is installed, it will overwrite the path above
|
|
from amdsmi import amdsmi_interface
|
|
from amdsmi import amdsmi_exception
|
|
|
|
# Using basic python logging for user errors and development
|
|
logging.basicConfig(format="%(levelname)s: %(message)s", level=logging.ERROR) # User level logging
|
|
# This traceback limit only affects this file, once the code hit's the cli portion it get's reset to the user's preference
|
|
sys.tracebacklimit = -1 # Disable traceback when raising errors
|
|
|
|
# On initial import set initialized variable
|
|
AMDSMI_INITIALIZED = False
|
|
AMD_VENDOR_ID = 4098
|
|
|
|
def check_amdgpu_driver():
|
|
""" Returns true if amdgpu is found in the list of initialized modules """
|
|
amd_gpu_status_file = Path("/sys/module/amdgpu/initstate")
|
|
if amd_gpu_status_file.exists():
|
|
if amd_gpu_status_file.read_text(encoding="ascii").strip() == "live":
|
|
return True
|
|
return False
|
|
|
|
|
|
def check_amdhsmp_driver():
|
|
""" Returns true if amd hsmp is found in the list of initialized modules """
|
|
amd_cpu_status_file = Path("/sys/module/amd_hsmp/initstate")
|
|
if amd_cpu_status_file.exists():
|
|
if amd_cpu_status_file.read_text(encoding="ascii").strip() == "live":
|
|
return True
|
|
return False
|
|
|
|
|
|
def init_amdsmi(flag=amdsmi_interface.AmdSmiInitFlags.INIT_AMD_GPUS):
|
|
""" Initializes AMDSMI
|
|
|
|
Raises:
|
|
err: AmdSmiLibraryException if not successful
|
|
"""
|
|
gpu_flag = False;
|
|
cpu_flag = False;
|
|
|
|
# Check if both the amdgpu and amdhsmp driver is up and handle error gracefully
|
|
if check_amdgpu_driver() and check_amdhsmp_driver():
|
|
# init AMD APUS
|
|
try:
|
|
amdsmi_interface.amdsmi_init(amdsmi_interface.AmdSmiInitFlags.INIT_AMD_APUS)
|
|
except (amdsmi_interface.AmdSmiLibraryException, amdsmi_interface.AmdSmiParameterException) as e:
|
|
if e.err_code in (amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NOT_INIT,
|
|
amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_DRIVER_NOT_LOADED):
|
|
logging.error("Drivers not loaded (amdgpu and hsmp drivers not found in modules)")
|
|
sys.exit(-1)
|
|
else:
|
|
raise e
|
|
# # Check if amdgpu driver is up & Handle error gracefully
|
|
elif check_amdgpu_driver():
|
|
# Only init AMD GPUs for now, waiting for future support for AMD CPUs
|
|
try:
|
|
amdsmi_interface.amdsmi_init(amdsmi_interface.AmdSmiInitFlags.INIT_AMD_GPUS)
|
|
except (amdsmi_interface.AmdSmiLibraryException, amdsmi_interface.AmdSmiParameterException) as e:
|
|
if e.err_code in (amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NOT_INIT,
|
|
amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_DRIVER_NOT_LOADED):
|
|
logging.error("Driver not loaded (amdgpu not found in modules)")
|
|
sys.exit(-1)
|
|
else:
|
|
raise e
|
|
logging.debug("AMDSMI initialized successfully, but initstate was not live")
|
|
|
|
elif check_amdhsmp_driver():
|
|
# Only init AMD CPUs
|
|
try:
|
|
amdsmi_interface.amdsmi_init(amdsmi_interface.AmdSmiInitFlags.INIT_AMD_CPUS)
|
|
cpu_flag = True
|
|
except (amdsmi_interface.AmdSmiLibraryException, amdsmi_interface.AmdSmiParameterException) as e:
|
|
if e.err_code in (amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NOT_INIT,
|
|
amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_DRIVER_NOT_LOADED):
|
|
logging.error("Driver not loaded (hsmp not found in modules)")
|
|
sys.exit(-1)
|
|
else:
|
|
raise e
|
|
else:
|
|
pass
|
|
|
|
logging.debug("AMDSMI initialized successfully")
|
|
|
|
|
|
def shut_down_amdsmi():
|
|
"""Shutdown AMDSMI instance
|
|
|
|
Raises:
|
|
err: AmdSmiLibraryException if not successful
|
|
"""
|
|
try:
|
|
amdsmi_interface.amdsmi_shut_down()
|
|
except amdsmi_exception.AmdSmiLibraryException as e:
|
|
logging.error("Unable to cleanly shut down amd-smi-lib")
|
|
raise e
|
|
|
|
|
|
def signal_handler(sig, frame):
|
|
logging.debug(f"Handling signal: {sig}")
|
|
sys.exit(0)
|
|
|
|
|
|
if not AMDSMI_INITIALIZED:
|
|
init_amdsmi()
|
|
AMDSMI_INITIALIZED = True
|
|
signal.signal(signal.SIGINT, signal_handler)
|
|
signal.signal(signal.SIGTERM, signal_handler)
|
|
atexit.register(shut_down_amdsmi)
|