Files
rocm-systems/amdsmi_cli/amdsmi_init.py
T
Naveen Krishna Chatradhi 19030e5b72 amdsmi_cli: Add support for CPU specific API in amdsmi_cli tool
- Add support for only CPU if only the hsmp driver is driver is present.
  - Add support for both the amdgpu and amdcpu's if both the amdgpu driver and cpu's are present.
  - Add support for socket power metrics
  - Add support for hsmp proto type version, prochot status, read current fclkmclk freq
    and current cclk freq limit, c0 residency, lclk dpm level range, socket frequency range
  - Add CPU socket current frequency limit.
  - Update tool for API's IO bandwidth, XGMI bandwidth,
    power telemetry rails, APB enable and APB disable API's
  - Add support set_pow_limit, set_xgmi_link_width, set_lclk_dpm_level, core_boost_limit,
    curr_active_freq_core_limit, set_soc_boost_limit and set_core_boost_limit.
  - Add support for the following cpu related API's in tool
    core_energy, socket energy, set power efficiency mode, ddr bandwidth,
    cpu temperature, dimm temperature range rate, dimm power consumption
    and dimm thermal temperature.
  - Add support for set_gmi3_link_width, set_pcie_lnk_rate, set_df_pstate_range

Change-Id: I5a35d1cceeb7df0bc8b7116df7c27bb7f376e839
2023-12-18 06:31:49 -05:00

142 строки
5.5 KiB
Python

#!/usr/bin/env python3
#
# Copyright (C) 2023 Advanced Micro Devices. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of
# this software and associated documentation files (the "Software"), to deal in
# the Software without restriction, including without limitation the rights to
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
# the Software, and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
### Handle safe initialization for amdsmi
import atexit
import logging
import signal
import sys
from pathlib import Path
sys.path.append(f"{Path(__file__).resolve().parent}/../../share/amd_smi")
# If the python library is installed, it will overwrite the path above
from amdsmi import amdsmi_interface
from amdsmi import amdsmi_exception
# Using basic python logging for user errors and development
logging.basicConfig(format="%(levelname)s: %(message)s", level=logging.ERROR) # User level logging
# This traceback limit only affects this file, once the code hit's the cli portion it get's reset to the user's preference
sys.tracebacklimit = -1 # Disable traceback when raising errors
# On initial import set initialized variable
AMDSMI_INITIALIZED = False
AMD_VENDOR_ID = 4098
def check_amdgpu_driver():
""" Returns true if amdgpu is found in the list of initialized modules """
amd_gpu_status_file = Path("/sys/module/amdgpu/initstate")
if amd_gpu_status_file.exists():
if amd_gpu_status_file.read_text(encoding="ascii").strip() == "live":
return True
return False
def check_amdhsmp_driver():
""" Returns true if amd hsmp is found in the list of initialized modules """
amd_cpu_status_file = Path("/sys/module/amd_hsmp/initstate")
if amd_cpu_status_file.exists():
if amd_cpu_status_file.read_text(encoding="ascii").strip() == "live":
return True
return False
def init_amdsmi(flag=amdsmi_interface.AmdSmiInitFlags.INIT_AMD_GPUS):
""" Initializes AMDSMI
Raises:
err: AmdSmiLibraryException if not successful
"""
gpu_flag = False;
cpu_flag = False;
# Check if both the amdgpu and amdhsmp driver is up and handle error gracefully
if check_amdgpu_driver() and check_amdhsmp_driver():
# init AMD APUS
try:
amdsmi_interface.amdsmi_init(amdsmi_interface.AmdSmiInitFlags.INIT_AMD_APUS)
except (amdsmi_interface.AmdSmiLibraryException, amdsmi_interface.AmdSmiParameterException) as e:
if e.err_code in (amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NOT_INIT,
amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_DRIVER_NOT_LOADED):
logging.error("Drivers not loaded (amdgpu and hsmp drivers not found in modules)")
sys.exit(-1)
else:
raise e
# # Check if amdgpu driver is up & Handle error gracefully
elif check_amdgpu_driver():
# Only init AMD GPUs for now, waiting for future support for AMD CPUs
try:
amdsmi_interface.amdsmi_init(amdsmi_interface.AmdSmiInitFlags.INIT_AMD_GPUS)
except (amdsmi_interface.AmdSmiLibraryException, amdsmi_interface.AmdSmiParameterException) as e:
if e.err_code in (amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NOT_INIT,
amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_DRIVER_NOT_LOADED):
logging.error("Driver not loaded (amdgpu not found in modules)")
sys.exit(-1)
else:
raise e
logging.debug("AMDSMI initialized successfully, but initstate was not live")
elif check_amdhsmp_driver():
# Only init AMD CPUs
try:
amdsmi_interface.amdsmi_init(amdsmi_interface.AmdSmiInitFlags.INIT_AMD_CPUS)
cpu_flag = True
except (amdsmi_interface.AmdSmiLibraryException, amdsmi_interface.AmdSmiParameterException) as e:
if e.err_code in (amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NOT_INIT,
amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_DRIVER_NOT_LOADED):
logging.error("Driver not loaded (hsmp not found in modules)")
sys.exit(-1)
else:
raise e
else:
pass
logging.debug("AMDSMI initialized successfully")
def shut_down_amdsmi():
"""Shutdown AMDSMI instance
Raises:
err: AmdSmiLibraryException if not successful
"""
try:
amdsmi_interface.amdsmi_shut_down()
except amdsmi_exception.AmdSmiLibraryException as e:
logging.error("Unable to cleanly shut down amd-smi-lib")
raise e
def signal_handler(sig, frame):
logging.debug(f"Handling signal: {sig}")
sys.exit(0)
if not AMDSMI_INITIALIZED:
init_amdsmi()
AMDSMI_INITIALIZED = True
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)
atexit.register(shut_down_amdsmi)