Add 'projects/amdsmi/' from commit 'b4b3539631460b986dddc86a2303cef11cd38816'
git-subtree-dir: projects/amdsmi git-subtree-mainline:0633d8d8cegit-subtree-split:b4b3539631
This commit is contained in:
@@ -0,0 +1,42 @@
|
||||
resources:
|
||||
repositories:
|
||||
- repository: pipelines_repo
|
||||
type: github
|
||||
endpoint: ROCm
|
||||
name: ROCm/ROCm
|
||||
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml@pipelines_repo
|
||||
|
||||
trigger:
|
||||
batch: true
|
||||
branches:
|
||||
include:
|
||||
- amd-staging
|
||||
- amd-mainline
|
||||
paths:
|
||||
exclude:
|
||||
- .github
|
||||
- docs
|
||||
- '.*.y*ml'
|
||||
- '*.md'
|
||||
- LICENSE
|
||||
|
||||
pr:
|
||||
autoCancel: true
|
||||
branches:
|
||||
include:
|
||||
- amd-staging
|
||||
- amd-mainline
|
||||
paths:
|
||||
exclude:
|
||||
- .github
|
||||
- docs
|
||||
- '.*.y*ml'
|
||||
- '*.md'
|
||||
- LICENSE
|
||||
drafts: false
|
||||
|
||||
jobs:
|
||||
- template: ${{ variables.CI_COMPONENT_PATH }}/amdsmi.yml@pipelines_repo
|
||||
@@ -0,0 +1,4 @@
|
||||
---
|
||||
Language: Cpp
|
||||
BasedOnStyle: Google
|
||||
ColumnLimit: 100
|
||||
@@ -0,0 +1,33 @@
|
||||
Checks:
|
||||
bugprone*,
|
||||
clang-analyzer*,
|
||||
google*,
|
||||
misc*,
|
||||
modernize*,
|
||||
-abseil*,
|
||||
-bugprone-easily-swappable-parameters,
|
||||
-bugprone-reserved-identifier,
|
||||
-clang-analyzer-security.insecureAPI.strcpy,
|
||||
-clang-diagnostic-sign-conversion,
|
||||
-clang-diagnostic-unused-parameter,
|
||||
-cppcoreguidelines*,
|
||||
-cppcoreguidelines-pro*,
|
||||
-google-readability*,
|
||||
-google-runtime-int,
|
||||
-misc-const-correctness,
|
||||
-misc-include-cleaner,
|
||||
-misc-non-copyable-objects,
|
||||
-misc-unused-parameters,
|
||||
-misc-use-anonymous-namespace,
|
||||
-misc-use-internal-linkage,
|
||||
-modernize-avoid-c-arrays,
|
||||
-modernize-macro-to-enum,
|
||||
-modernize-redundant-void-arg,
|
||||
-modernize-use-auto,
|
||||
-modernize-use-nodiscard,
|
||||
-modernize-use-noexcept,
|
||||
-modernize-use-nullptr,
|
||||
-modernize-use-trailing-return-type,
|
||||
-modernize-use-using,
|
||||
-performance*,
|
||||
-readability*,
|
||||
@@ -0,0 +1,42 @@
|
||||
CompileFlags:
|
||||
Remove: -W*
|
||||
Add: [-Wall, -Wno-c++20-designator, -pedantic, -Wno-sign-conversion]
|
||||
Compiler: clang++
|
||||
|
||||
# list here: https://clang.llvm.org/extra/clang-tidy/checks/list.html
|
||||
Diagnostics:
|
||||
UnusedIncludes: Strict
|
||||
# rules below are copied into .clang-tidy using ./.update-clang-tidy.sh
|
||||
# please keep the rules sorted alphabetically
|
||||
ClangTidy:
|
||||
Add: [
|
||||
bugprone*,
|
||||
clang-analyzer*,
|
||||
google*,
|
||||
misc*,
|
||||
modernize*,
|
||||
]
|
||||
Remove: [
|
||||
abseil*,
|
||||
bugprone-easily-swappable-parameters,
|
||||
bugprone-reserved-identifier,
|
||||
cppcoreguidelines*,
|
||||
cppcoreguidelines-pro*,
|
||||
google-readability*,
|
||||
google-runtime-int,
|
||||
misc-const-correctness,
|
||||
misc-include-cleaner,
|
||||
misc-non-copyable-objects,
|
||||
misc-unused-parameters,
|
||||
misc-use-anonymous-namespace,
|
||||
modernize-avoid-c-arrays,
|
||||
modernize-redundant-void-arg,
|
||||
modernize-use-auto,
|
||||
modernize-use-nodiscard,
|
||||
modernize-use-noexcept,
|
||||
modernize-use-nullptr,
|
||||
modernize-use-trailing-return-type,
|
||||
modernize-use-using,
|
||||
performance*,
|
||||
readability*,
|
||||
]
|
||||
@@ -0,0 +1,253 @@
|
||||
# ----------------------------------
|
||||
# Options affecting listfile parsing
|
||||
# ----------------------------------
|
||||
with section("parse"):
|
||||
|
||||
# Specify structure for custom cmake functions
|
||||
additional_commands = {
|
||||
'parse_version': {
|
||||
'kwargs': {
|
||||
'VERSION_STRING': '*'
|
||||
}
|
||||
},
|
||||
'get_version_from_tag': {
|
||||
'kwargs': {
|
||||
'DEFAULT_VERSION_STRING': '*',
|
||||
'VERSION_PREFIX': '*',
|
||||
'GIT': '*'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Override configurations per-command where available
|
||||
override_spec = {}
|
||||
|
||||
# Specify variable tags.
|
||||
vartags = []
|
||||
|
||||
# Specify property tags.
|
||||
proptags = []
|
||||
|
||||
# -----------------------------
|
||||
# Options affecting formatting.
|
||||
# -----------------------------
|
||||
with section("format"):
|
||||
|
||||
# Disable formatting entirely, making cmake-format a no-op
|
||||
#disable = True
|
||||
|
||||
# How wide to allow formatted cmake files
|
||||
line_width = 120
|
||||
|
||||
# How many spaces to tab for indent
|
||||
tab_size = 4
|
||||
|
||||
# If true, lines are indented using tab characters (utf-8 0x09) instead of
|
||||
# <tab_size> space characters (utf-8 0x20). In cases where the layout would
|
||||
# require a fractional tab character, the behavior of the fractional
|
||||
# indentation is governed by <fractional_tab_policy>
|
||||
use_tabchars = False
|
||||
|
||||
# If <use_tabchars> is True, then the value of this variable indicates how
|
||||
# fractional indentions are handled during whitespace replacement. If set to
|
||||
# 'use-space', fractional indentation is left as spaces (utf-8 0x20). If set
|
||||
# to `round-up` fractional indentation is replaced with a single tab character
|
||||
# (utf-8 0x09) effectively shifting the column to the next tabstop
|
||||
fractional_tab_policy = 'use-space'
|
||||
|
||||
# If an argument group contains more than this many sub-groups (parg or kwarg
|
||||
# groups) then force it to a vertical layout.
|
||||
max_subgroups_hwrap = 3
|
||||
|
||||
# If a positional argument group contains more than this many arguments, then
|
||||
# force it to a vertical layout.
|
||||
max_pargs_hwrap = 6
|
||||
|
||||
# If a cmdline positional group consumes more than this many lines without
|
||||
# nesting, then invalidate the layout (and nest)
|
||||
max_rows_cmdline = 2
|
||||
|
||||
# If true, separate flow control names from their parentheses with a space
|
||||
separate_ctrl_name_with_space = False
|
||||
|
||||
# If true, separate function names from parentheses with a space
|
||||
separate_fn_name_with_space = False
|
||||
|
||||
# If a statement is wrapped to more than one line, than dangle the closing
|
||||
# parenthesis on its own line.
|
||||
dangle_parens = False
|
||||
|
||||
# If the trailing parenthesis must be 'dangled' on its on line, then align it
|
||||
# to this reference: `prefix`: the start of the statement, `prefix-indent`:
|
||||
# the start of the statement, plus one indentation level, `child`: align to
|
||||
# the column of the arguments
|
||||
dangle_align = 'prefix'
|
||||
|
||||
# If the statement spelling length (including space and parenthesis) is
|
||||
# smaller than this amount, then force reject nested layouts.
|
||||
min_prefix_chars = 4
|
||||
|
||||
# If the statement spelling length (including space and parenthesis) is larger
|
||||
# than the tab width by more than this amount, then force reject un-nested
|
||||
# layouts.
|
||||
max_prefix_chars = 10
|
||||
|
||||
# If a candidate layout is wrapped horizontally but it exceeds this many
|
||||
# lines, then reject the layout.
|
||||
max_lines_hwrap = 2
|
||||
|
||||
# What style line endings to use in the output.
|
||||
line_ending = 'unix'
|
||||
|
||||
# Format command names consistently as 'lower' or 'upper' case
|
||||
command_case = 'canonical'
|
||||
|
||||
# Format keywords consistently as 'lower' or 'upper' case
|
||||
keyword_case = 'unchanged'
|
||||
|
||||
# A list of command names which should always be wrapped
|
||||
always_wrap = ['install']
|
||||
|
||||
# If true, the argument lists which are known to be sortable will be sorted
|
||||
# lexicographicall
|
||||
enable_sort = True
|
||||
|
||||
# If true, the parsers may infer whether or not an argument list is sortable
|
||||
# (without annotation).
|
||||
autosort = False
|
||||
|
||||
# By default, if cmake-format cannot successfully fit everything into the
|
||||
# desired linewidth it will apply the last, most agressive attempt that it
|
||||
# made. If this flag is True, however, cmake-format will print error, exit
|
||||
# with non-zero status code, and write-out nothing
|
||||
require_valid_layout = False
|
||||
|
||||
# A dictionary mapping layout nodes to a list of wrap decisions. See the
|
||||
# documentation for more information.
|
||||
layout_passes = {}
|
||||
|
||||
# ------------------------------------------------
|
||||
# Options affecting comment reflow and formatting.
|
||||
# ------------------------------------------------
|
||||
with section("markup"):
|
||||
|
||||
# What character to use for bulleted lists
|
||||
bullet_char = '*'
|
||||
|
||||
# What character to use as punctuation after numerals in an enumerated list
|
||||
enum_char = '.'
|
||||
|
||||
# If comment markup is enabled, don't reflow the first comment block in each
|
||||
# listfile. Use this to preserve formatting of your copyright/license
|
||||
# statements.
|
||||
first_comment_is_literal = False
|
||||
|
||||
# If comment markup is enabled, don't reflow any comment block which matches
|
||||
# this (regex) pattern. Default is `None` (disabled).
|
||||
literal_comment_pattern = None
|
||||
|
||||
# Regular expression to match preformat fences in comments default=
|
||||
# ``r'^\s*([`~]{3}[`~]*)(.*)$'``
|
||||
fence_pattern = '^\\s*([`~]{3}[`~]*)(.*)$'
|
||||
|
||||
# Regular expression to match rulers in comments default=
|
||||
# ``r'^\s*[^\w\s]{3}.*[^\w\s]{3}$'``
|
||||
ruler_pattern = '^\\s*[^\\w\\s]{3}.*[^\\w\\s]{3}$'
|
||||
|
||||
# If a comment line matches starts with this pattern then it is explicitly a
|
||||
# trailing comment for the preceeding argument. Default is '#<'
|
||||
explicit_trailing_pattern = '#<'
|
||||
|
||||
# If a comment line starts with at least this many consecutive hash
|
||||
# characters, then don't lstrip() them off. This allows for lazy hash rulers
|
||||
# where the first hash char is not separated by space
|
||||
hashruler_min_length = 10
|
||||
|
||||
# If true, then insert a space between the first hash char and remaining hash
|
||||
# chars in a hash ruler, and normalize its length to fill the column
|
||||
canonicalize_hashrulers = True
|
||||
|
||||
# enable comment markup parsing and reflow
|
||||
enable_markup = False
|
||||
|
||||
# ----------------------------
|
||||
# Options affecting the linter
|
||||
# ----------------------------
|
||||
with section("lint"):
|
||||
|
||||
# a list of lint codes to disable
|
||||
disabled_codes = ['C0307', 'C0301', 'C0305']
|
||||
|
||||
# regular expression pattern describing valid function names
|
||||
function_pattern = '[0-9a-z_]+'
|
||||
|
||||
# regular expression pattern describing valid macro names
|
||||
macro_pattern = '[0-9A-Z_]+'
|
||||
|
||||
# regular expression pattern describing valid names for variables with global
|
||||
# (cache) scope
|
||||
global_var_pattern = '[A-Z][0-9A-Z_]+'
|
||||
|
||||
# regular expression pattern describing valid names for variables with global
|
||||
# scope (but internal semantic)
|
||||
internal_var_pattern = '_[A-Z][0-9A-Z_]+'
|
||||
|
||||
# regular expression pattern describing valid names for variables with local
|
||||
# scope
|
||||
local_var_pattern = '[a-z][a-z0-9_]+'
|
||||
|
||||
# regular expression pattern describing valid names for privatedirectory
|
||||
# variables
|
||||
private_var_pattern = '_[0-9a-z_]+'
|
||||
|
||||
# regular expression pattern describing valid names for public directory
|
||||
# variables
|
||||
public_var_pattern = '[A-Z][0-9A-Z_]+'
|
||||
|
||||
# regular expression pattern describing valid names for function/macro
|
||||
# arguments and loop variables.
|
||||
argument_var_pattern = '[a-z][a-z0-9_]+'
|
||||
|
||||
# regular expression pattern describing valid names for keywords used in
|
||||
# functions or macros
|
||||
keyword_pattern = '[A-Z][0-9A-Z_]+'
|
||||
|
||||
# In the heuristic for C0201, how many conditionals to match within a loop in
|
||||
# before considering the loop a parser.
|
||||
max_conditionals_custom_parser = 2
|
||||
|
||||
# Require at least this many newlines between statements
|
||||
min_statement_spacing = 1
|
||||
|
||||
# Require no more than this many newlines between statements
|
||||
max_statement_spacing = 2
|
||||
max_returns = 6
|
||||
max_branches = 12
|
||||
max_arguments = 5
|
||||
max_localvars = 15
|
||||
max_statements = 50
|
||||
|
||||
# -------------------------------
|
||||
# Options affecting file encoding
|
||||
# -------------------------------
|
||||
with section("encode"):
|
||||
|
||||
# If true, emit the unicode byte-order mark (BOM) at the start of the file
|
||||
emit_byteorder_mark = False
|
||||
|
||||
# Specify the encoding of the input file. Defaults to utf-8
|
||||
input_encoding = 'utf-8'
|
||||
|
||||
# Specify the encoding of the output file. Defaults to utf-8. Note that cmake
|
||||
# only claims to support utf-8 so be careful when using anything else
|
||||
output_encoding = 'utf-8'
|
||||
|
||||
# -------------------------------------
|
||||
# Miscellaneous configurations options.
|
||||
# -------------------------------------
|
||||
with section("misc"):
|
||||
|
||||
# A dictionary containing any per-command configuration overrides. Currently
|
||||
# only `command_case` is supported.
|
||||
per_command = {}
|
||||
|
||||
@@ -0,0 +1,16 @@
|
||||
# EditorConfig standardizes spacing in all editors: https://EditorConfig.org
|
||||
# Please get a plugin for your editor to match the formatting
|
||||
|
||||
# top-most EditorConfig file
|
||||
root = true
|
||||
|
||||
[*.py]
|
||||
indent_style = space
|
||||
|
||||
# Matches multiple files with brace expansion notation
|
||||
# Set default charset
|
||||
[*.{c,cc,cpp,h,hh,hpp}]
|
||||
charset = utf-8
|
||||
indent_style = space
|
||||
indent_size = 2
|
||||
max_line_length = 100
|
||||
Vendored
+6
@@ -0,0 +1,6 @@
|
||||
* @maisarif_amdeng @shuzhliu_amdeng @dgalants_amdeng @charpoag_amdeng @daolivei_amdeng @marifamd @bill-shuzhou-liu @dmitrii-galantsev @charis-poag-amd @oliveiradan @gabrpham_amdeng
|
||||
|
||||
docs/* @ROCm/rocm-documentation
|
||||
*.md @ROCm/rocm-documentation
|
||||
*.rst @ROCm/rocm-documentation
|
||||
|
||||
+84
@@ -0,0 +1,84 @@
|
||||
# Contributing to AMD SMI #
|
||||
|
||||
We welcome contributions to AMD SMI.
|
||||
Please follow these details to help ensure your contributions will be successfully accepted.
|
||||
|
||||
## Issue Discussion ##
|
||||
|
||||
Please use the GitHub Issues tab to notify us of issues.
|
||||
|
||||
* Use your best judgement for issue creation. If your issue is already listed, upvote the issue and
|
||||
comment or post to provide additional details, such as how you reproduced this issue.
|
||||
* If you're not sure if your issue is the same, err on the side of caution and file your issue.
|
||||
You can add a comment to include the issue number (and link) for the similar issue. If we evaluate
|
||||
your issue as being the same as the existing issue, we'll close the duplicate.
|
||||
* If your issue doesn't exist, use the issue template to file a new issue.
|
||||
* When filing an issue, be sure to provide as much information as possible, including script output so
|
||||
we can collect information about your configuration. This helps reduce the time required to
|
||||
reproduce your issue.
|
||||
* Check your issue regularly, as we may require additional information to successfully reproduce the
|
||||
issue.
|
||||
* You may also open an issue to ask questions to the maintainers about whether a proposed change
|
||||
meets the acceptance criteria, or to discuss an idea pertaining to the library.
|
||||
|
||||
## Acceptance Criteria ##
|
||||
|
||||
The goal of AMD SMI project is to provide a simple CLI interface and a library
|
||||
for interacting with AMD GPUs.
|
||||
|
||||
## Coding Style ##
|
||||
|
||||
Please refer to `.clang-format`. It is suggested you use `pre-commit` tool.
|
||||
It mostly follows Google C++ formatting with 100 character line limit.
|
||||
|
||||
## Pull Request Guidelines ##
|
||||
|
||||
When you create a pull request, you should target the default branch. Our
|
||||
current default branch is the **amd-staging** branch, which serves as our
|
||||
integration branch.
|
||||
|
||||
### Deliverables ###
|
||||
|
||||
For each new file in repository,
|
||||
Please include the licensing header
|
||||
|
||||
/*
|
||||
* =============================================================================
|
||||
* Copyright (c) 2019-2025 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
### Process ###
|
||||
|
||||
* Reviewers are listed in the CODEOWNERS file
|
||||
* Code format guidelines
|
||||
|
||||
AMD SMI uses the clang-format tool for formatting code in source files.
|
||||
The formatting style is captured in .clang-format which is located at
|
||||
the root of AMD SMI. These are different options to follow:
|
||||
|
||||
1. Using pre-commit and docker - `pre-commit run`
|
||||
1. Using only clang-format - `clang-format -i \<path-to-the-source-file\>`
|
||||
|
||||
## References ##
|
||||
|
||||
1. [pre-commit](https://github.com/pre-commit/pre-commit)
|
||||
1. [clang-format](https://clang.llvm.org/docs/ClangFormat.html)
|
||||
+18
@@ -0,0 +1,18 @@
|
||||
# To get started with Dependabot version updates, you'll need to specify which
|
||||
# package ecosystems to update and where the package manifests are located.
|
||||
# Please see the documentation for all configuration options:
|
||||
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
|
||||
|
||||
version: 2
|
||||
updates:
|
||||
- package-ecosystem: "pip" # See documentation for possible values
|
||||
directory: "/docs/sphinx" # Location of package manifests
|
||||
open-pull-requests-limit: 10
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
target-branch: "amd-staging"
|
||||
labels:
|
||||
- "documentation"
|
||||
- "dependencies"
|
||||
reviewers:
|
||||
- "petepark_amdeng"
|
||||
Vendored
+5
@@ -0,0 +1,5 @@
|
||||
disabled: false
|
||||
scmId: gh-emu-rocm
|
||||
branchesToScan:
|
||||
- amd-staging
|
||||
- amd-mainline
|
||||
@@ -0,0 +1,314 @@
|
||||
name: ABI Compliance Check
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- amd-staging
|
||||
- release/rocm-rel-*
|
||||
paths:
|
||||
- 'include/amd_smi/amdsmi.h'
|
||||
push:
|
||||
branches:
|
||||
- amd-staging
|
||||
- release/rocm-rel-*
|
||||
paths:
|
||||
- 'include/amd_smi/amdsmi.h'
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
|
||||
jobs:
|
||||
major_abi_check:
|
||||
name: Major ABI Compliance Check
|
||||
runs-on: AMD-ROCm-Internal-dev1
|
||||
steps:
|
||||
- name: Setup Environment
|
||||
run: |
|
||||
sudo rm -rf $GITHUB_WORKSPACE/* || true
|
||||
sudo rm -rf $GITHUB_WORKSPACE/.[!.]* || true
|
||||
sudo apt-get update -qq
|
||||
sudo apt-get install -y -qq perl build-essential git universal-ctags
|
||||
git clone https://github.com/lvc/abi-compliance-checker.git
|
||||
cd abi-compliance-checker
|
||||
sudo make install
|
||||
abi-compliance-checker --version
|
||||
|
||||
- name: Checkout current code (new version)
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
ref: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
|
||||
- name: Fetch base branch for PR
|
||||
if: github.event_name == 'pull_request'
|
||||
run: |
|
||||
echo "Fetching base branch: ${{ github.base_ref }}"
|
||||
git fetch origin ${{ github.base_ref }}:${{ github.base_ref }}
|
||||
git branch -a
|
||||
|
||||
- name: Prepare amdsmi.h files for comparison
|
||||
id: prepare_files
|
||||
run: |
|
||||
echo "Preparing amdsmi.h files..."
|
||||
echo "abi_exit_code=1" > $GITHUB_WORKSPACE/major_abi_status.txt
|
||||
|
||||
if [ -f include/amd_smi/amdsmi.h ]; then
|
||||
cp include/amd_smi/amdsmi.h amdsmi_new.h
|
||||
echo "Copied current amdsmi.h to amdsmi_new.h"
|
||||
else
|
||||
echo "::error::New amdsmi.h (include/amd_smi/amdsmi.h) not found in current checkout."
|
||||
touch amdsmi_new.h
|
||||
exit 0
|
||||
fi
|
||||
|
||||
OLD_VERSION_REF=""
|
||||
V1_NAME_SUFFIX=""
|
||||
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
|
||||
OLD_VERSION_REF="${{ github.base_ref }}"
|
||||
V1_NAME_SUFFIX="base_${{ github.base_ref }}"
|
||||
echo "Event is Pull Request. Old version source is base branch: ${OLD_VERSION_REF}"
|
||||
elif [[ "${{ github.event_name }}" == "push" ]]; then
|
||||
if [[ "${{ github.event.before }}" != "0000000000000000000000000000000000000000" ]]; then
|
||||
OLD_VERSION_REF="${{ github.event.before }}"
|
||||
V1_NAME_SUFFIX="before_$(echo ${{ github.event.before }} | cut -c1-7)"
|
||||
echo "Event is Push. Old version source is commit before push: ${OLD_VERSION_REF}"
|
||||
else
|
||||
echo "Push event is for a new branch or forced push. Cannot determine 'old' version."
|
||||
touch amdsmi_old.h
|
||||
echo "Created dummy amdsmi_old.h. Assuming no ABI breakage as no baseline."
|
||||
echo "abi_exit_code=0" > $GITHUB_WORKSPACE/major_abi_status.txt
|
||||
echo "skip_check=true" >> $GITHUB_OUTPUT
|
||||
exit 0
|
||||
fi
|
||||
else
|
||||
echo "::warning::Unsupported event type: ${{ github.event_name }}. Cannot determine old version."
|
||||
touch amdsmi_old.h
|
||||
echo "abi_exit_code=0" > $GITHUB_WORKSPACE/major_abi_status.txt
|
||||
echo "skip_check=true" >> $GITHUB_OUTPUT
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "Fetching amdsmi.h from ref: $OLD_VERSION_REF (as amdsmi_old.h)"
|
||||
git show $OLD_VERSION_REF:include/amd_smi/amdsmi.h > amdsmi_old.h 2>/dev/null
|
||||
if [ $? -ne 0 ] || [ ! -s amdsmi_old.h ]; then
|
||||
echo "::warning::Failed to fetch 'include/amd_smi/amdsmi.h' from ref '$OLD_VERSION_REF' or file is empty/missing."
|
||||
echo "Proceeding with an empty amdsmi_old.h. This may result in all symbols reported as 'added'."
|
||||
echo -n "" > amdsmi_old.h
|
||||
if [ ! -s amdsmi_new.h ]; then
|
||||
echo "abi_exit_code=0" > $GITHUB_WORKSPACE/major_abi_status.txt
|
||||
fi
|
||||
else
|
||||
echo "Successfully fetched amdsmi.h from $OLD_VERSION_REF to amdsmi_old.h"
|
||||
fi
|
||||
echo "v1_name_suffix=${V1_NAME_SUFFIX}" >> $GITHUB_OUTPUT
|
||||
echo "skip_check=false" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Run Major ABI Compliance Check
|
||||
if: steps.prepare_files.outputs.skip_check == 'false'
|
||||
run: |
|
||||
V1_NAME_SUFFIX_CLEAN=$(echo "${{ steps.prepare_files.outputs.v1_name_suffix }}" | tr '/' '-')
|
||||
V2_NAME_CLEAN=$(echo "${{ github.ref_name || github.head_ref }}" | tr '/' '-')
|
||||
|
||||
echo "Comparing $V1_NAME_SUFFIX_CLEAN (old) with $V2_NAME_CLEAN (new) for Major ABI Check"
|
||||
abi-compliance-checker -lib amdsmi -old amdsmi_old.h -new amdsmi_new.h -v1 "$V1_NAME_SUFFIX_CLEAN" -v2 "$V2_NAME_CLEAN" -report-path major-abi-report.html && echo "abi_exit_code=0" > $GITHUB_WORKSPACE/major_abi_status.txt
|
||||
continue-on-error: true
|
||||
|
||||
- name: Display ABI Check Logs (Major)
|
||||
if: always() && steps.prepare_files.outputs.skip_check == 'false'
|
||||
run: |
|
||||
echo "Displaying Major ABI compliance check logs (if any)"
|
||||
find logs -type f -name "*.txt" -exec echo "--- {} ---" \; -exec cat {} \; || echo "No .txt logs found in logs/ directory."
|
||||
|
||||
- name: Label PR on Major ABI Breakage
|
||||
if: always() && github.event_name == 'pull_request'
|
||||
run: |
|
||||
source $GITHUB_WORKSPACE/major_abi_status.txt
|
||||
if [ "$abi_exit_code" -ne 0 ]; then
|
||||
echo "Major ABI check failed, adding 'MAJOR ABI BREAKAGE' label to PR #${{ github.event.pull_request.number }}"
|
||||
gh pr edit ${{ github.event.pull_request.number }} --add-label "MAJOR ABI BREAKAGE"
|
||||
fi
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Upload Major ABI Report
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: major-abi-report
|
||||
path: major-abi-report.html
|
||||
if-no-files-found: ignore
|
||||
|
||||
- name: Report Major ABI Check Results
|
||||
if: always()
|
||||
run: |
|
||||
echo "Checking Major ABI check exit code..."
|
||||
source $GITHUB_WORKSPACE/major_abi_status.txt
|
||||
echo "Major ABI check exit code: $abi_exit_code"
|
||||
if [ "$abi_exit_code" -ne 0 ]; then
|
||||
echo "::warning::⚠️ MAJOR ABI BREAKAGE FOUND ⚠️ CHECK \"Run Major ABI Compliance Check\" LOGS OR THE major-abi-report ARTIFACT FOR DETAILS."
|
||||
else
|
||||
echo "✅ Major ABI check succeeded."
|
||||
fi
|
||||
|
||||
minor_abi_check:
|
||||
name: Minor ABI Compliance Check
|
||||
runs-on: AMD-ROCm-Internal-dev1
|
||||
steps:
|
||||
- name: Setup Environment
|
||||
run: |
|
||||
sudo rm -rf $GITHUB_WORKSPACE/* || true
|
||||
sudo rm -rf $GITHUB_WORKSPACE/.[!.]* || true
|
||||
sudo apt-get update -qq
|
||||
sudo apt-get install -y -qq perl build-essential git universal-ctags
|
||||
git clone https://github.com/lvc/abi-compliance-checker.git
|
||||
cd abi-compliance-checker
|
||||
sudo make install
|
||||
abi-compliance-checker --version
|
||||
|
||||
- name: Checkout current code (new version)
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
ref: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
|
||||
- name: Fetch base branch for PR
|
||||
if: github.event_name == 'pull_request'
|
||||
run: |
|
||||
echo "Fetching base branch: ${{ github.base_ref }}"
|
||||
git fetch origin ${{ github.base_ref }}:${{ github.base_ref }}
|
||||
git branch -a
|
||||
|
||||
- name: Prepare amdsmi.h files for comparison
|
||||
id: prepare_files_minor
|
||||
run: |
|
||||
echo "Preparing amdsmi.h files for Minor check..."
|
||||
echo "abi_exit_code=1" > $GITHUB_WORKSPACE/minor_abi_status.txt
|
||||
|
||||
if [ -f include/amd_smi/amdsmi.h ]; then
|
||||
cp include/amd_smi/amdsmi.h amdsmi_new.h
|
||||
echo "Copied current amdsmi.h to amdsmi_new.h for Minor check"
|
||||
else
|
||||
echo "::error::New amdsmi.h (include/amd_smi/amdsmi.h) not found in current checkout for Minor check."
|
||||
touch amdsmi_new.h
|
||||
exit 0
|
||||
fi
|
||||
|
||||
OLD_VERSION_REF_MINOR=""
|
||||
V1_NAME_SUFFIX_MINOR=""
|
||||
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
|
||||
OLD_VERSION_REF_MINOR="${{ github.base_ref }}"
|
||||
V1_NAME_SUFFIX_MINOR="base_${{ github.base_ref }}"
|
||||
elif [[ "${{ github.event_name }}" == "push" ]]; then
|
||||
if [[ "${{ github.event.before }}" != "0000000000000000000000000000000000000000" ]]; then
|
||||
OLD_VERSION_REF_MINOR="${{ github.event.before }}"
|
||||
V1_NAME_SUFFIX_MINOR="before_$(echo ${{ github.event.before }} | cut -c1-7)"
|
||||
else
|
||||
echo "Push event is for a new branch (Minor check). Assuming no ABI changes as no baseline."
|
||||
touch amdsmi_old.h
|
||||
echo "abi_exit_code=0" > $GITHUB_WORKSPACE/minor_abi_status.txt
|
||||
echo "skip_check_minor=true" >> $GITHUB_OUTPUT
|
||||
exit 0
|
||||
fi
|
||||
else
|
||||
echo "::warning::Unsupported event type for Minor ABI check: ${{ github.event_name }}."
|
||||
touch amdsmi_old.h
|
||||
echo "abi_exit_code=0" > $GITHUB_WORKSPACE/minor_abi_status.txt
|
||||
echo "skip_check_minor=true" >> $GITHUB_OUTPUT
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "Fetching amdsmi.h from ref: $OLD_VERSION_REF_MINOR (as amdsmi_old.h) for Minor check"
|
||||
git show $OLD_VERSION_REF_MINOR:include/amd_smi/amdsmi.h > amdsmi_old.h 2>/dev/null
|
||||
if [ $? -ne 0 ] || [ ! -s amdsmi_old.h ]; then
|
||||
echo "::warning::Failed to fetch 'include/amd_smi/amdsmi.h' from ref '$OLD_VERSION_REF_MINOR' or file is empty/missing for Minor check."
|
||||
echo "Proceeding with an empty amdsmi_old.h for Minor check."
|
||||
echo -n "" > amdsmi_old.h
|
||||
if [ ! -s amdsmi_new.h ]; then
|
||||
echo "abi_exit_code=0" > $GITHUB_WORKSPACE/minor_abi_status.txt
|
||||
fi
|
||||
else
|
||||
echo "Successfully fetched amdsmi.h from $OLD_VERSION_REF_MINOR to amdsmi_old.h for Minor check"
|
||||
fi
|
||||
echo "v1_name_suffix_minor=${V1_NAME_SUFFIX_MINOR}" >> $GITHUB_OUTPUT
|
||||
echo "skip_check_minor=false" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Run Minor ABI Compliance Check (Strict)
|
||||
if: steps.prepare_files_minor.outputs.skip_check_minor == 'false'
|
||||
run: |
|
||||
V1_NAME_SUFFIX_CLEAN=$(echo "${{ steps.prepare_files_minor.outputs.v1_name_suffix_minor }}" | tr '/' '-')
|
||||
V2_NAME_CLEAN=$(echo "${{ github.ref_name || github.head_ref }}" | tr '/' '-')
|
||||
COMPARE_MSG="$V1_NAME_SUFFIX_CLEAN vs $V2_NAME_CLEAN"
|
||||
|
||||
echo "Comparing $COMPARE_MSG for Minor ABI Check (Strict)"
|
||||
|
||||
abi-compliance-checker -lib amdsmi -old amdsmi_old.h -new amdsmi_new.h -v1 "$V1_NAME_SUFFIX_CLEAN" -v2 "$V2_NAME_CLEAN" -report-path minor-abi-report.html -strict || {
|
||||
ACC_EXIT_CODE=$?
|
||||
echo "abi-compliance-checker -strict failed with exit code $ACC_EXIT_CODE."
|
||||
echo "abi_exit_code=$ACC_EXIT_CODE" > $GITHUB_WORKSPACE/minor_abi_status.txt
|
||||
}
|
||||
|
||||
current_abi_status=$(cat $GITHUB_WORKSPACE/minor_abi_status.txt)
|
||||
current_exit_code=${current_abi_status#*=}
|
||||
|
||||
if [ "$current_exit_code" -eq 0 ] && [ -f minor-abi-report.html ]; then
|
||||
echo "ACC strict check passed. Parsing HTML report for any changes..."
|
||||
CHANGED=0
|
||||
if grep -q "Added Symbols.*[1-9]" minor-abi-report.html; then CHANGED=1; echo "::warning::STRICT ABI: Found added symbols"; fi
|
||||
if grep -q "Removed Symbols.*[1-9]" minor-abi-report.html; then CHANGED=1; echo "::warning::STRICT ABI: Found removed symbols"; fi
|
||||
if grep -q "Problems with.*Data Types.*[1-9]" minor-abi-report.html; then CHANGED=1; echo "::warning::STRICT ABI: Found problems with data types"; fi
|
||||
if grep -q "Problems with.*Symbols.*[1-9]" minor-abi-report.html; then CHANGED=1; echo "::warning::STRICT ABI: Found problems with symbols"; fi
|
||||
if grep -q "Problems with.*Constants.*[1-9]" minor-abi-report.html; then CHANGED=1; echo "::warning::STRICT ABI: Found problems with constants"; fi
|
||||
|
||||
if [ "$CHANGED" -eq 1 ]; then
|
||||
echo "::error::STRICT ABI CHECK FAILED: Found changes in ABI report comparing $COMPARE_MSG"
|
||||
echo "abi_exit_code=1" > $GITHUB_WORKSPACE/minor_abi_status.txt
|
||||
else
|
||||
echo "No strict ABI changes found in HTML report."
|
||||
echo "abi_exit_code=0" > $GITHUB_WORKSPACE/minor_abi_status.txt
|
||||
fi
|
||||
elif [ ! -f minor-abi-report.html ] && [ "$current_exit_code" -eq 0 ]; then
|
||||
echo "::warning::Minor ABI report (minor-abi-report.html) not found, but ACC reported success. Assuming no changes."
|
||||
echo "abi_exit_code=0" > $GITHUB_WORKSPACE/minor_abi_status.txt
|
||||
elif [ "$current_exit_code" -ne 0 ]; then
|
||||
echo "ACC strict check already indicated failure (exit code $current_exit_code). HTML parsing for further changes skipped or confirmed failure."
|
||||
fi
|
||||
continue-on-error: true
|
||||
|
||||
- name: Display ABI Check Logs (Minor)
|
||||
if: always() && steps.prepare_files_minor.outputs.skip_check_minor == 'false'
|
||||
run: |
|
||||
echo "Displaying Minor ABI compliance check logs (if any)"
|
||||
find logs -type f -name "*.txt" -exec echo "--- {} ---" \; -exec cat {} \; || echo "No .txt logs found in logs/ directory."
|
||||
|
||||
- name: Label PR on Minor ABI Breakage
|
||||
if: always() && github.event_name == 'pull_request'
|
||||
run: |
|
||||
source $GITHUB_WORKSPACE/minor_abi_status.txt
|
||||
if [ "$abi_exit_code" -ne 0 ]; then
|
||||
echo "Minor ABI check failed, adding 'MINOR ABI BREAKAGE' label to PR #${{ github.event.pull_request.number }}"
|
||||
gh pr edit ${{ github.event.pull_request.number }} --add-label "MINOR ABI BREAKAGE"
|
||||
fi
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Upload Minor ABI Report
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: minor-abi-report
|
||||
path: minor-abi-report.html
|
||||
if-no-files-found: ignore
|
||||
|
||||
- name: Report Minor ABI Check Results
|
||||
if: always()
|
||||
run: |
|
||||
echo "Checking Minor ABI check exit code..."
|
||||
source $GITHUB_WORKSPACE/minor_abi_status.txt
|
||||
echo "Minor ABI check exit code: $abi_exit_code"
|
||||
if [ "$abi_exit_code" -ne 0 ]; then
|
||||
echo "::warning::⚠️ MINOR ABI CHANGES FOUND (STRICT CHECK) ⚠️ CHECK \"Run Minor ABI Compliance Check (Strict)\" LOGS OR THE minor-abi-report ARTIFACT FOR DETAILS."
|
||||
else
|
||||
echo "✅ Minor ABI check (Strict) succeeded or found no changes."
|
||||
fi
|
||||
@@ -0,0 +1,836 @@
|
||||
name: AMDSMI CI
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [amd-staging, amd-mainline, release/rocm-rel-*]
|
||||
push:
|
||||
branches: [amd-staging, amd-mainline, release/rocm-rel-*]
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
env:
|
||||
DEBIAN_FRONTEND: noninteractive
|
||||
DEBCONF_NONINTERACTIVE_SEEN: true
|
||||
BUILD_TYPE: Release
|
||||
ROCM_DIR: /opt/rocm
|
||||
|
||||
jobs:
|
||||
debian-buildinstall:
|
||||
name: Build
|
||||
runs-on:
|
||||
- self-hosted
|
||||
- ${{ vars.RUNNER_TYPE }}
|
||||
continue-on-error: true
|
||||
strategy:
|
||||
max-parallel: 10
|
||||
matrix:
|
||||
os: [Ubuntu20, Ubuntu22, Debian10]
|
||||
container:
|
||||
image: ${{ vars[format('{0}_DOCKER_IMAGE', matrix.os)] }}
|
||||
options: --rm --privileged --device=/dev/kfd --device=/dev/dri --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --shm-size=64G --cap-add=SYS_MODULE -v /lib/modules:/lib/modules
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Update repositories for Debian10
|
||||
if: matrix.os == 'Debian10'
|
||||
run: |
|
||||
set -e
|
||||
echo 'Updating repositories for Debian10 (archived)'
|
||||
cat > /etc/apt/sources.list << EOF
|
||||
deb http://archive.debian.org/debian buster main
|
||||
deb http://archive.debian.org/debian-security buster/updates main
|
||||
EOF
|
||||
echo 'Acquire::Check-Valid-Until "false";' > /etc/apt/apt.conf.d/99-disable-check-valid-until
|
||||
apt update
|
||||
|
||||
- name: Build AMDSMI
|
||||
run: |
|
||||
set -e
|
||||
echo 'Building on ${{ matrix.os }}'
|
||||
BUILD_FOLDER=$GITHUB_WORKSPACE/build
|
||||
RETRIES=3
|
||||
|
||||
for i in $(seq 1 $RETRIES); do
|
||||
echo "Build attempt $i for ${{ matrix.os }}..."
|
||||
rm -rf $BUILD_FOLDER
|
||||
mkdir -p $BUILD_FOLDER
|
||||
cd $BUILD_FOLDER
|
||||
|
||||
# Capture build output to parse warnings
|
||||
if cmake $GITHUB_WORKSPACE -DBUILD_TESTS=ON -DENABLE_ESMI_LIB=ON 2>&1 | tee cmake.log && \
|
||||
make -j $(nproc) 2>&1 | tee make.log && \
|
||||
make package 2>&1 | tee package.log; then
|
||||
|
||||
# Parse and report warnings as GitHub annotations
|
||||
echo "::group::Build Warnings"
|
||||
grep -i "warning" cmake.log make.log package.log | while read -r line; do
|
||||
echo "::warning::$line"
|
||||
done
|
||||
echo "::endgroup::"
|
||||
|
||||
echo "Build successful on attempt $i"
|
||||
break
|
||||
else
|
||||
echo "Build failed on attempt $i"
|
||||
if [ $i -eq $RETRIES ]; then
|
||||
echo "All $RETRIES build attempts failed. Exiting."
|
||||
exit 1
|
||||
fi
|
||||
sleep $((2 * i))
|
||||
fi
|
||||
done
|
||||
echo "Build completed on ${{ matrix.os }}"
|
||||
|
||||
- name: Install AMDSMI
|
||||
run: |
|
||||
cd $GITHUB_WORKSPACE/build
|
||||
if [ "${{ matrix.os }}" != "Debian10" ]; then
|
||||
apt update
|
||||
fi
|
||||
|
||||
RETRIES=3
|
||||
for i in $(seq 1 $RETRIES); do
|
||||
echo "Installation attempt $i for ${{ matrix.os }}..."
|
||||
if apt install -y ./amd-smi-lib*99999-local_amd64.deb; then
|
||||
echo "Installation successful on attempt $i"
|
||||
ln -s /opt/rocm/bin/amd-smi /usr/local/bin
|
||||
|
||||
# Verify Installation
|
||||
echo 'Verifying installation:'
|
||||
amd-smi version
|
||||
python3 -m pip list | grep amd
|
||||
python3 -m pip list | grep pip
|
||||
python3 -m pip list | grep setuptools
|
||||
echo 'Completed installation on ${{ matrix.os }}'
|
||||
break
|
||||
else
|
||||
echo "Installation failed on attempt $i"
|
||||
if [ $i -eq $RETRIES ]; then
|
||||
echo "All $RETRIES installation attempts failed. Exiting."
|
||||
exit 1
|
||||
fi
|
||||
sleep $((2 * i))
|
||||
fi
|
||||
done
|
||||
echo "Build completed on ${{ matrix.os }}"
|
||||
|
||||
- name: Uninstall
|
||||
if: always()
|
||||
run: |
|
||||
set -e
|
||||
echo 'Uninstalling on ${{ matrix.os }}'
|
||||
apt remove -y amd-smi-lib || true
|
||||
rm -f /usr/local/bin/amd-smi
|
||||
if [ -d /opt/rocm/share/amd_smi ]; then
|
||||
echo '/opt/rocm/share/amd_smi exists. Removing.'
|
||||
rm -rf /opt/rocm/share/amd_smi
|
||||
fi
|
||||
echo 'Uninstall done on ${{ matrix.os }}'
|
||||
|
||||
debian-test:
|
||||
name: Tests
|
||||
needs: debian-buildinstall
|
||||
runs-on:
|
||||
- self-hosted
|
||||
- ${{ vars.RUNNER_TYPE }}
|
||||
continue-on-error: true
|
||||
strategy:
|
||||
max-parallel: 10
|
||||
matrix:
|
||||
os: [Ubuntu20, Ubuntu22, Debian10]
|
||||
container:
|
||||
image: ${{ vars[format('{0}_DOCKER_IMAGE', matrix.os)] }}
|
||||
options: --rm --privileged --device=/dev/kfd --device=/dev/dri --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --shm-size=64G --cap-add=SYS_MODULE -v /lib/modules:/lib/modules
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Update repositories for Debian10
|
||||
if: matrix.os == 'Debian10'
|
||||
run: |
|
||||
set -e
|
||||
echo 'Updating repositories for Debian10 (archived)'
|
||||
cat > /etc/apt/sources.list << EOF
|
||||
deb http://archive.debian.org/debian buster main
|
||||
deb http://archive.debian.org/debian-security buster/updates main
|
||||
EOF
|
||||
echo 'Acquire::Check-Valid-Until "false";' > /etc/apt/apt.conf.d/99-disable-check-valid-until
|
||||
apt update
|
||||
|
||||
- name: Build and Install for Test
|
||||
run: |
|
||||
set -e
|
||||
echo 'Building for test on ${{ matrix.os }}'
|
||||
BUILD_FOLDER=$GITHUB_WORKSPACE/build
|
||||
RETRIES=3
|
||||
|
||||
for i in $(seq 1 $RETRIES); do
|
||||
echo "Build attempt $i for ${{ matrix.os }} test..."
|
||||
rm -rf $BUILD_FOLDER
|
||||
mkdir -p $BUILD_FOLDER
|
||||
cd $BUILD_FOLDER
|
||||
|
||||
if cmake $GITHUB_WORKSPACE -DBUILD_TESTS=ON -DENABLE_ESMI_LIB=ON && \
|
||||
make -j $(nproc) && \
|
||||
make package; then
|
||||
echo "Build successful on attempt $i"
|
||||
break
|
||||
else
|
||||
echo "Build failed on attempt $i"
|
||||
if [ $i -eq $RETRIES ]; then
|
||||
echo "All $RETRIES build attempts failed. Exiting."
|
||||
exit 1
|
||||
fi
|
||||
sleep $((2 * i))
|
||||
fi
|
||||
done
|
||||
|
||||
echo 'Installing for test on ${{ matrix.os }}'
|
||||
for i in $(seq 1 $RETRIES); do
|
||||
echo "Installation attempt $i for test on ${{ matrix.os }}..."
|
||||
if apt install -y $BUILD_FOLDER/amd-smi-lib*99999-local_amd64.deb; then
|
||||
echo "Installation successful on attempt $i"
|
||||
ln -s /opt/rocm/bin/amd-smi /usr/local/bin
|
||||
echo 'Install done for test on ${{ matrix.os }}'
|
||||
break
|
||||
else
|
||||
echo "Installation failed on attempt $i"
|
||||
if [ $i -eq $RETRIES ]; then
|
||||
echo "All $RETRIES installation attempts failed. Exiting."
|
||||
exit 1
|
||||
fi
|
||||
sleep $((2 * i))
|
||||
fi
|
||||
done
|
||||
|
||||
- name: AMDSMI Command Tests
|
||||
shell: bash
|
||||
run: |
|
||||
set -e
|
||||
echo "Running AMDSMI commands on ${{ matrix.os }}"
|
||||
mkdir -p /tmp/test-results-${{ matrix.os }}
|
||||
commands=(
|
||||
"amd-smi version"
|
||||
"amd-smi list"
|
||||
"amd-smi static"
|
||||
"amd-smi firmware"
|
||||
"amd-smi ucode"
|
||||
"amd-smi bad-pages"
|
||||
"amd-smi metric"
|
||||
"amd-smi process"
|
||||
"amd-smi topology"
|
||||
"amd-smi monitor"
|
||||
"amd-smi dmon"
|
||||
"amd-smi xgmi"
|
||||
"amd-smi partition"
|
||||
)
|
||||
for cmd in "${commands[@]}"; do
|
||||
debug_cmd="$cmd --loglevel debug"
|
||||
echo "Running: $debug_cmd"
|
||||
if ! eval "$debug_cmd" > /tmp/test-results-${{ matrix.os }}/$(echo $cmd | tr ' ' '_').log 2>&1; then
|
||||
echo "Command '$debug_cmd' failed."
|
||||
cat /tmp/test-results-${{ matrix.os }}/$(echo $cmd | tr ' ' '_').log
|
||||
exit 1
|
||||
else
|
||||
echo "$debug_cmd passed."
|
||||
fi
|
||||
done
|
||||
echo "AMDSMI commands done on ${{ matrix.os }}"
|
||||
|
||||
- name: Upload AMDSMI Command Test Results
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: amdsmi-command-tests-${{ matrix.os }}
|
||||
path: /tmp/test-results-${{ matrix.os }}
|
||||
|
||||
- name: Run AMDSMI, Python, and Example Tests
|
||||
shell: bash
|
||||
run: |
|
||||
set -e
|
||||
echo 'Running other tests on ${{ matrix.os }}'
|
||||
|
||||
# AMDSMI Tests
|
||||
echo 'Running AMDSMI tests'
|
||||
cd /opt/rocm/share/amd_smi/tests
|
||||
source amdsmitst.exclude
|
||||
|
||||
AMDSMI_RETRIES=3
|
||||
for attempt in $(seq 1 $AMDSMI_RETRIES); do
|
||||
echo "AMDSMI test attempt $attempt for ${{ matrix.os }}..."
|
||||
if ./amdsmitst --gtest_filter="-$(echo ${BLACKLIST_ALL_ASICS})" > /tmp/test-results-${{ matrix.os }}/amdsmi_tests.log 2>&1; then
|
||||
echo "AMDSMI tests passed on attempt $attempt"
|
||||
echo "=============== TEST OUTPUT ==============="
|
||||
cat /tmp/test-results-${{ matrix.os }}/amdsmi_tests.log | grep -E "\[==========\]|\[ PASSED \]|\[ SKIPPED \]|\[ FAILED \]"
|
||||
echo "=============================================="
|
||||
echo "AMDSMI tests done"
|
||||
break
|
||||
else
|
||||
TEST_EXIT_CODE=$?
|
||||
echo "AMDSMI tests failed on attempt $attempt with exit code $TEST_EXIT_CODE"
|
||||
if [ $attempt -eq $AMDSMI_RETRIES ]; then
|
||||
echo "All $AMDSMI_RETRIES AMDSMI test attempts failed. Final failure."
|
||||
echo "=============== TEST OUTPUT ==============="
|
||||
cat /tmp/test-results-${{ matrix.os }}/amdsmi_tests.log | grep -E "\[==========\]|\[ PASSED \]|\[ SKIPPED \]|\[ FAILED \]"
|
||||
echo "=============================================="
|
||||
echo "AMDSMI tests failed"
|
||||
exit $TEST_EXIT_CODE
|
||||
else
|
||||
echo "Retrying AMDSMI tests in $((2 * attempt)) seconds..."
|
||||
sleep $((2 * attempt))
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
# Python Tests
|
||||
echo 'Running Python tests'
|
||||
cd /opt/rocm/share/amd_smi/tests/python_unittest
|
||||
echo "Running integration tests..."
|
||||
if ! ./integration_test.py -v > /tmp/test-results-${{ matrix.os }}/integration_test_output.txt 2>&1; then
|
||||
echo "Integration tests failed!"
|
||||
echo "=============== INTEGRATION TEST OUTPUT ==============="
|
||||
tail -100 /tmp/test-results-${{ matrix.os }}/integration_test_output.txt
|
||||
echo "======================================================="
|
||||
exit 1
|
||||
else
|
||||
echo "Integration tests passed"
|
||||
fi
|
||||
|
||||
echo "Running unit tests..."
|
||||
if ! ./unit_tests.py -v > /tmp/test-results-${{ matrix.os }}/unit_test_output.txt 2>&1; then
|
||||
echo "Unit tests failed!"
|
||||
echo "=============== UNIT TEST OUTPUT ==============="
|
||||
tail -100 /tmp/test-results-${{ matrix.os }}/unit_test_output.txt
|
||||
echo "================================================"
|
||||
exit 1
|
||||
else
|
||||
echo "Unit tests passed"
|
||||
fi
|
||||
|
||||
echo "Python tests done"
|
||||
|
||||
# Example Tests
|
||||
echo 'Running Example tests'
|
||||
cd $GITHUB_WORKSPACE/example
|
||||
rm -rf build
|
||||
cmake -B build -DENABLE_ESMI_LIB=OFF
|
||||
make -C build -j $(nproc)
|
||||
cd build
|
||||
./amd_smi_drm_ex > /tmp/test-results-${{ matrix.os }}/amd_smi_drm_ex.log 2>&1 || echo 'amd_smi_drm_ex failed'
|
||||
./amd_smi_nodrm_ex > /tmp/test-results-${{ matrix.os }}/amd_smi_nodrm_ex.log 2>&1 || echo 'amd_smi_nodrm_ex failed'
|
||||
echo "Example tests done"
|
||||
|
||||
- name: AMDSMI Test Results
|
||||
if: always()
|
||||
run: |
|
||||
echo "Displaying AMDSMI test results for ${{ matrix.os }}"
|
||||
cat /tmp/test-results-${{ matrix.os }}/amdsmi_tests.log || echo "No AMDSMI test results found for ${{ matrix.os }}"
|
||||
|
||||
- name: Integration Test Results
|
||||
if: always()
|
||||
run: |
|
||||
echo "Displaying Integration test results for ${{ matrix.os }}"
|
||||
cat /tmp/test-results-${{ matrix.os }}/integration_test_output.txt || echo "No integration test results found for ${{ matrix.os }}"
|
||||
|
||||
- name: Unit Test Results
|
||||
if: always()
|
||||
run: |
|
||||
echo "Displaying Unit Test Results for ${{ matrix.os }}"
|
||||
cat /tmp/test-results-${{ matrix.os }}/unit_test_output.txt || echo "No unit test results found for ${{ matrix.os }}"
|
||||
|
||||
- name: Example DRM Test Results
|
||||
if: always()
|
||||
run: |
|
||||
echo "Displaying Example DRM test results for ${{ matrix.os }}"
|
||||
cat /tmp/test-results-${{ matrix.os }}/amd_smi_drm_ex.log || echo "No DRM example test results found for ${{ matrix.os }}"
|
||||
|
||||
- name: Example NoDRM Test Results
|
||||
if: always()
|
||||
run: |
|
||||
echo "Displaying Example NoDRM test results for ${{ matrix.os }}"
|
||||
cat /tmp/test-results-${{ matrix.os }}/amd_smi_nodrm_ex.log || echo "No NoDRM example test results found for ${{ matrix.os }}"
|
||||
|
||||
rpm-buildinstall:
|
||||
name: Build
|
||||
runs-on:
|
||||
- self-hosted
|
||||
- ${{ vars.RUNNER_TYPE }}
|
||||
continue-on-error: true
|
||||
strategy:
|
||||
max-parallel: 10
|
||||
matrix:
|
||||
os:
|
||||
- SLES
|
||||
- RHEL8
|
||||
- RHEL9
|
||||
- RHEL10
|
||||
- AzureLinux3
|
||||
- AlmaLinux8
|
||||
container:
|
||||
image: ${{ vars[format('{0}_DOCKER_IMAGE', matrix.os)] }}
|
||||
options: --rm --privileged --device=/dev/kfd --device=/dev/dri --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --shm-size=64G --cap-add=SYS_MODULE -v /lib/modules:/lib/modules
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set PkgMgr
|
||||
run: |
|
||||
set -e
|
||||
case "${{ matrix.os }}" in
|
||||
SLES)
|
||||
echo "PACKAGE_MANAGER=zypper" >> $GITHUB_ENV
|
||||
;;
|
||||
RHEL8|RHEL9|RHEL10|AlmaLinux8|AzureLinux3)
|
||||
echo "PACKAGE_MANAGER=dnf" >> $GITHUB_ENV
|
||||
;;
|
||||
esac
|
||||
|
||||
- name: Add more_itertools
|
||||
if: matrix.os == 'AzureLinux3'
|
||||
run: |
|
||||
set -e
|
||||
echo 'Installing more_itertools on ${{ matrix.os }}'
|
||||
python3 -m pip install more_itertools
|
||||
|
||||
- name: Build AMDSMI(RHEL10 & AlmaLinux8)
|
||||
if: matrix.os == 'RHEL10' || matrix.os == 'AlmaLinux8'
|
||||
run: |
|
||||
set -e
|
||||
echo 'Building on ${{ matrix.os }} with retries and QA_RPATHS'
|
||||
BUILD_FOLDER=$GITHUB_WORKSPACE/build
|
||||
RETRIES=5
|
||||
|
||||
# Set QA_RPATHS to ignore empty (0x0010) and invalid (0x0002) RPATHs
|
||||
export QA_RPATHS=$((0x0010 | 0x0002))
|
||||
|
||||
for i in $(seq 1 $RETRIES); do
|
||||
echo "Build attempt $i for ${{ matrix.os }} ..."
|
||||
rm -rf $BUILD_FOLDER
|
||||
mkdir -p $BUILD_FOLDER
|
||||
cd $BUILD_FOLDER
|
||||
|
||||
if cmake $GITHUB_WORKSPACE -DBUILD_TESTS=ON -DENABLE_ESMI_LIB=ON && \
|
||||
make -j $(nproc) && \
|
||||
make package; then
|
||||
echo "Build successful on attempt $i"
|
||||
break
|
||||
else
|
||||
echo "Build failed on attempt $i"
|
||||
if [ $i -eq $RETRIES ]; then
|
||||
echo "All $RETRIES build attempts failed. Exiting."
|
||||
exit 1
|
||||
fi
|
||||
sleep $((2 * i))
|
||||
fi
|
||||
done
|
||||
echo "Build completed on ${{ matrix.os }}"
|
||||
|
||||
- name: Build AMDSMI
|
||||
if: matrix.os != 'RHEL10' && matrix.os != 'AlmaLinux8'
|
||||
run: |
|
||||
set -e
|
||||
echo 'Building on ${{ matrix.os }}'
|
||||
BUILD_FOLDER=$GITHUB_WORKSPACE/build
|
||||
RETRIES=3
|
||||
|
||||
for i in $(seq 1 $RETRIES); do
|
||||
echo "Build attempt $i for ${{ matrix.os }}..."
|
||||
rm -rf $BUILD_FOLDER
|
||||
mkdir -p $BUILD_FOLDER
|
||||
cd $BUILD_FOLDER
|
||||
|
||||
# Capture build output to parse warnings
|
||||
if cmake $GITHUB_WORKSPACE -DBUILD_TESTS=ON -DENABLE_ESMI_LIB=ON 2>&1 | tee cmake.log && \
|
||||
make -j $(nproc) 2>&1 | tee make.log && \
|
||||
make package 2>&1 | tee package.log; then
|
||||
|
||||
# Parse and report warnings as GitHub annotations
|
||||
echo "::group::Build Warnings"
|
||||
grep -i "warning" cmake.log make.log package.log | while read -r line; do
|
||||
echo "::warning::$line"
|
||||
done
|
||||
echo "::endgroup::"
|
||||
|
||||
echo "Build successful on attempt $i"
|
||||
break
|
||||
else
|
||||
echo "Build failed on attempt $i"
|
||||
if [ $i -eq $RETRIES ]; then
|
||||
echo "All $RETRIES build attempts failed. Exiting."
|
||||
exit 1
|
||||
fi
|
||||
sleep $((2 * i))
|
||||
fi
|
||||
done
|
||||
echo "Build completed on ${{ matrix.os }}"
|
||||
|
||||
- name: Install AMDSMI(RHEL10 & AlmaLinux8)
|
||||
if: matrix.os == 'RHEL10' || matrix.os == 'AlmaLinux8'
|
||||
run: |
|
||||
cd $GITHUB_WORKSPACE/build
|
||||
dnf install python3-setuptools python3-wheel -y
|
||||
|
||||
RETRIES=3
|
||||
for i in $(seq 1 $RETRIES); do
|
||||
echo "RHEL10: Installation attempt $i..."
|
||||
if timeout 10m dnf install -y --skip-broken --disablerepo=* ./amd-smi-lib-*99999-local*.rpm; then
|
||||
echo "Installation successful on attempt $i"
|
||||
ln -s /opt/rocm/bin/amd-smi /usr/local/bin
|
||||
|
||||
echo 'Verifying installation:'
|
||||
amd-smi version
|
||||
python3 -m pip list | grep amd
|
||||
python3 -m pip list | grep pip
|
||||
python3 -m pip list | grep setuptools
|
||||
echo 'Completed installation on RHEL10'
|
||||
break
|
||||
else
|
||||
echo "Installation failed on attempt $i"
|
||||
if [ $i -eq $RETRIES ]; then
|
||||
echo "All $RETRIES installation attempts failed. Exiting."
|
||||
exit 1
|
||||
fi
|
||||
sleep $((2 * i))
|
||||
fi
|
||||
done
|
||||
|
||||
- name: Install AMDSMI
|
||||
if: matrix.os != 'RHEL10' && matrix.os != 'AlmaLinux8'
|
||||
run: |
|
||||
cd $GITHUB_WORKSPACE/build
|
||||
case ${{ env.PACKAGE_MANAGER }} in
|
||||
zypper)
|
||||
timeout 10m zypper --no-refresh --no-gpg-checks install -y ./amd-smi-lib-*99999-local*.rpm
|
||||
;;
|
||||
dnf)
|
||||
dnf install python3-setuptools python3-wheel -y
|
||||
RETRIES=3
|
||||
for i in $(seq 1 $RETRIES); do
|
||||
echo "Attempt $i: Installing AMDSMI package..."
|
||||
if timeout 10m dnf install -y --skip-broken --disablerepo=* ./amd-smi-lib-*99999-local*.rpm; then
|
||||
echo "AMDSMI package installed successfully."
|
||||
break
|
||||
else
|
||||
echo "Installation failed on attempt $i. Retrying..."
|
||||
if [ $i -eq $RETRIES ]; then
|
||||
echo "All $RETRIES attempts failed. Exiting."
|
||||
exit 1
|
||||
fi
|
||||
sleep 10
|
||||
fi
|
||||
done
|
||||
;;
|
||||
esac
|
||||
ln -s /opt/rocm/bin/amd-smi /usr/local/bin
|
||||
|
||||
# Verify Installation
|
||||
echo 'Verifying installation:'
|
||||
amd-smi version
|
||||
python3 -m pip list | grep amd
|
||||
python3 -m pip list | grep pip
|
||||
python3 -m pip list | grep setuptools
|
||||
echo 'Completed installation on ${{ matrix.os }}'
|
||||
|
||||
- name: Uninstall
|
||||
if: always()
|
||||
run: |
|
||||
set -e
|
||||
echo 'Uninstalling on ${{ matrix.os }}'
|
||||
case ${{ matrix.os }} in
|
||||
SLES)
|
||||
zypper remove -y amd-smi-lib || true
|
||||
;;
|
||||
RHEL8|RHEL9|RHEL10|AlmaLinux8|AzureLinux3)
|
||||
dnf remove -y amd-smi-lib || true
|
||||
;;
|
||||
esac
|
||||
rm -f /usr/local/bin/amd-smi
|
||||
if [ -d /opt/rocm/share/amd_smi ]; then
|
||||
echo '/opt/rocm/share/amd_smi exists. Removing.'
|
||||
rm -rf /opt/rocm/share/amd_smi
|
||||
fi
|
||||
echo 'Uninstall done on ${{ matrix.os }}'
|
||||
|
||||
rpm-test:
|
||||
name: Tests
|
||||
needs: [rpm-buildinstall, debian-test] # debian-test is needed to complete before rpm-test starts (see comment about driver reloads)
|
||||
runs-on:
|
||||
- self-hosted
|
||||
- ${{ vars.RUNNER_TYPE }}
|
||||
continue-on-error: true
|
||||
strategy:
|
||||
max-parallel: 10
|
||||
matrix:
|
||||
os:
|
||||
- SLES
|
||||
- RHEL8
|
||||
- RHEL9
|
||||
- RHEL10
|
||||
- AzureLinux3
|
||||
- AlmaLinux8
|
||||
container:
|
||||
image: ${{ vars[format('{0}_DOCKER_IMAGE', matrix.os)] }}
|
||||
options: --rm --privileged --device=/dev/kfd --device=/dev/dri --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --shm-size=64G --cap-add=SYS_MODULE -v /lib/modules:/lib/modules
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set PkgMgr
|
||||
run: |
|
||||
set -e
|
||||
case "${{ matrix.os }}" in
|
||||
SLES)
|
||||
echo "PACKAGE_MANAGER=zypper" >> $GITHUB_ENV
|
||||
;;
|
||||
RHEL8|RHEL9|RHEL10|AlmaLinux8|AzureLinux3)
|
||||
echo "PACKAGE_MANAGER=dnf" >> $GITHUB_ENV
|
||||
;;
|
||||
esac
|
||||
|
||||
- name: Add more_itertools
|
||||
if: matrix.os == 'AzureLinux3'
|
||||
run: |
|
||||
set -e
|
||||
echo 'Installing more_itertools on ${{ matrix.os }}'
|
||||
python3 -m pip install more_itertools
|
||||
|
||||
- name: Build and Install for Tests (RHEL10 & AlmaLinux8)
|
||||
if: matrix.os == 'RHEL10' || matrix.os == 'AlmaLinux8'
|
||||
run: |
|
||||
set -e
|
||||
echo 'Building for test on RHEL10/AlmaLinux8 with retries and QA_RPATHS'
|
||||
BUILD_FOLDER=$GITHUB_WORKSPACE/build
|
||||
RETRIES=5
|
||||
|
||||
# Set QA_RPATHS to ignore empty (0x0010 | 0x0002) RPATHs
|
||||
export QA_RPATHS=$((0x0010 | 0x0002))
|
||||
|
||||
for i in $(seq 1 $RETRIES); do
|
||||
echo "Build attempt $i for RHEL10/AlmaLinux8 test..."
|
||||
rm -rf $BUILD_FOLDER
|
||||
mkdir -p $BUILD_FOLDER
|
||||
cd $BUILD_FOLDER
|
||||
|
||||
if cmake $GITHUB_WORKSPACE -DBUILD_TESTS=ON -DENABLE_ESMI_LIB=ON && \
|
||||
make -j $(nproc) && \
|
||||
make package; then
|
||||
echo "Build successful on attempt $i"
|
||||
break
|
||||
else
|
||||
echo "Build failed on attempt $i"
|
||||
if [ $i -eq $RETRIES ]; then
|
||||
echo "All $RETRIES build attempts failed. Exiting."
|
||||
exit 1
|
||||
fi
|
||||
sleep $((2 * i))
|
||||
fi
|
||||
done
|
||||
|
||||
echo 'Installing for test on RHEL10/AlmaLinux8'
|
||||
dnf install python3-setuptools python3-wheel -y
|
||||
|
||||
for i in $(seq 1 $RETRIES); do
|
||||
echo "RHEL10/AlmaLinux8: Installation attempt $i for test..."
|
||||
if timeout 10m dnf install -y --skip-broken --disablerepo=* $BUILD_FOLDER/amd-smi-lib-*99999-local*.rpm; then
|
||||
echo "Installation successful on attempt $i"
|
||||
ln -s /opt/rocm/bin/amd-smi /usr/local/bin
|
||||
echo 'Install done for test on RHEL10/AlmaLinux8'
|
||||
break
|
||||
else
|
||||
echo "Installation failed on attempt $i"
|
||||
if [ $i -eq $RETRIES ]; then
|
||||
echo "All $RETRIES installation attempts failed. Exiting."
|
||||
exit 1
|
||||
fi
|
||||
sleep $((2 * i))
|
||||
fi
|
||||
done
|
||||
|
||||
- name: Build and Install for Tests
|
||||
if: matrix.os != 'RHEL10' && matrix.os != 'AlmaLinux8'
|
||||
run: |
|
||||
set -e
|
||||
echo 'Building for test on ${{ matrix.os }}'
|
||||
BUILD_FOLDER=$GITHUB_WORKSPACE/build
|
||||
rm -rf $BUILD_FOLDER
|
||||
mkdir -p $BUILD_FOLDER
|
||||
cd $BUILD_FOLDER
|
||||
cmake $GITHUB_WORKSPACE -DBUILD_TESTS=ON -DENABLE_ESMI_LIB=ON
|
||||
make -j $(nproc)
|
||||
make package
|
||||
|
||||
echo 'Installing for test on ${{ matrix.os }}'
|
||||
case ${{ env.PACKAGE_MANAGER }} in
|
||||
zypper)
|
||||
timeout 10m zypper --no-refresh --no-gpg-checks install -y $BUILD_FOLDER/amd-smi-lib-*99999-local*.rpm
|
||||
;;
|
||||
dnf)
|
||||
dnf install python3-setuptools python3-wheel -y
|
||||
RETRIES=3
|
||||
for i in $(seq 1 $RETRIES); do
|
||||
echo "Attempt $i: Installing..."
|
||||
if timeout 10m dnf install -y --skip-broken --disablerepo=* $BUILD_FOLDER/amd-smi-lib-*99999-local*.rpm; then
|
||||
echo "Install successful."
|
||||
break
|
||||
else
|
||||
echo "Attempt $i failed. Retrying..."
|
||||
if [ $i -eq $RETRIES ]; then
|
||||
echo "All attempts failed."
|
||||
exit 1
|
||||
fi
|
||||
sleep 10
|
||||
fi
|
||||
done
|
||||
;;
|
||||
esac
|
||||
ln -s /opt/rocm/bin/amd-smi /usr/local/bin
|
||||
echo 'Install done for test on ${{ matrix.os }}'
|
||||
|
||||
- name: AMDSMI Command Tests
|
||||
shell: bash
|
||||
run: |
|
||||
set -e
|
||||
echo "Running AMDSMI commands on ${{ matrix.os }}"
|
||||
mkdir -p /tmp/test-results-${{ matrix.os }}
|
||||
commands=(
|
||||
"amd-smi version"
|
||||
"amd-smi list"
|
||||
"amd-smi static"
|
||||
"amd-smi firmware"
|
||||
"amd-smi ucode"
|
||||
"amd-smi bad-pages"
|
||||
"amd-smi metric"
|
||||
"amd-smi process"
|
||||
"amd-smi topology"
|
||||
"amd-smi monitor"
|
||||
"amd-smi dmon"
|
||||
"amd-smi xgmi"
|
||||
"amd-smi partition"
|
||||
)
|
||||
for cmd in "${commands[@]}"; do
|
||||
debug_cmd="$cmd --loglevel debug"
|
||||
echo "Running: $debug_cmd"
|
||||
if ! eval "$debug_cmd" > /tmp/test-results-${{ matrix.os }}/$(echo $cmd | tr ' ' '_').log 2>&1; then
|
||||
echo "Command '$debug_cmd' failed."
|
||||
cat /tmp/test-results-${{ matrix.os }}/$(echo $cmd | tr ' ' '_').log
|
||||
exit 1
|
||||
else
|
||||
echo "$debug_cmd passed."
|
||||
fi
|
||||
done
|
||||
echo "AMDSMI commands done on ${{ matrix.os }}"
|
||||
|
||||
- name: Upload AMDSMI Command Test Results
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: amdsmi-command-tests-${{ matrix.os }}
|
||||
path: /tmp/test-results-${{ matrix.os }}
|
||||
|
||||
- name: Run AMDSMI, Python, and Example Tests
|
||||
shell: bash
|
||||
run: |
|
||||
set -e
|
||||
echo 'Running other tests on ${{ matrix.os }}'
|
||||
|
||||
# AMDSMI Tests
|
||||
echo 'Running AMDSMI tests'
|
||||
cd /opt/rocm/share/amd_smi/tests
|
||||
source amdsmitst.exclude
|
||||
|
||||
AMDSMI_RETRIES=3
|
||||
for attempt in $(seq 1 $AMDSMI_RETRIES); do
|
||||
echo "AMDSMI test attempt $attempt for ${{ matrix.os }}..."
|
||||
if ./amdsmitst --gtest_filter="-$(echo ${BLACKLIST_ALL_ASICS})" > /tmp/test-results-${{ matrix.os }}/amdsmi_tests.log 2>&1; then
|
||||
echo "AMDSMI tests passed on attempt $attempt"
|
||||
echo "=============== TEST OUTPUT ==============="
|
||||
cat /tmp/test-results-${{ matrix.os }}/amdsmi_tests.log | grep -E "\[==========\]|\[ PASSED \]|\[ SKIPPED \]|\[ FAILED \]"
|
||||
echo "=============================================="
|
||||
echo "AMDSMI tests done"
|
||||
break
|
||||
else
|
||||
TEST_EXIT_CODE=$?
|
||||
echo "AMDSMI tests failed on attempt $attempt with exit code $TEST_EXIT_CODE"
|
||||
if [ $attempt -eq $AMDSMI_RETRIES ]; then
|
||||
echo "All $AMDSMI_RETRIES AMDSMI test attempts failed. Final failure."
|
||||
echo "=============== TEST OUTPUT ==============="
|
||||
cat /tmp/test-results-${{ matrix.os }}/amdsmi_tests.log | grep -E "\[==========\]|\[ PASSED \]|\[ SKIPPED \]|\[ FAILED \]"
|
||||
echo "=============================================="
|
||||
echo "AMDSMI tests failed"
|
||||
exit $TEST_EXIT_CODE
|
||||
else
|
||||
echo "Retrying AMDSMI tests in $((2 * attempt)) seconds..."
|
||||
sleep $((2 * attempt))
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
# Python Tests
|
||||
echo 'Running Python tests'
|
||||
cd /opt/rocm/share/amd_smi/tests/python_unittest
|
||||
echo "Running integration tests..."
|
||||
if ! ./integration_test.py -v > /tmp/test-results-${{ matrix.os }}/integration_test_output.txt 2>&1; then
|
||||
echo "Integration tests failed!"
|
||||
echo "=============== INTEGRATION TEST OUTPUT ==============="
|
||||
tail -100 /tmp/test-results-${{ matrix.os }}/integration_test_output.txt
|
||||
echo "======================================================="
|
||||
exit 1
|
||||
else
|
||||
echo "Integration tests passed"
|
||||
fi
|
||||
|
||||
echo "Running unit tests..."
|
||||
if ! ./unit_tests.py -v > /tmp/test-results-${{ matrix.os }}/unit_test_output.txt 2>&1; then
|
||||
echo "Unit tests failed!"
|
||||
echo "=============== UNIT TEST OUTPUT ==============="
|
||||
tail -100 /tmp/test-results-${{ matrix.os }}/unit_test_output.txt
|
||||
echo "================================================"
|
||||
exit 1
|
||||
else
|
||||
echo "Unit tests passed"
|
||||
fi
|
||||
|
||||
echo "Python tests done"
|
||||
|
||||
# Example Tests
|
||||
echo 'Running Example tests'
|
||||
cd $GITHUB_WORKSPACE/example
|
||||
rm -rf build
|
||||
cmake -B build -DENABLE_ESMI_LIB=OFF
|
||||
make -C build -j $(nproc)
|
||||
cd build
|
||||
./amd_smi_drm_ex > /tmp/test-results-${{ matrix.os }}/amd_smi_drm_ex.log 2>&1 || echo 'amd_smi_drm_ex failed'
|
||||
./amd_smi_nodrm_ex > /tmp/test-results-${{ matrix.os }}/amd_smi_nodrm_ex.log 2>&1 || echo 'amd_smi_nodrm_ex failed'
|
||||
echo "Example tests done"
|
||||
|
||||
- name: AMDSMI Test Results
|
||||
if: always()
|
||||
run: |
|
||||
echo "Displaying AMDSMI test results for ${{ matrix.os }}"
|
||||
cat /tmp/test-results-${{ matrix.os }}/amdsmi_tests.log || echo "No AMDSMI test results found for ${{ matrix.os }}"
|
||||
|
||||
- name: Integration Test Results
|
||||
if: always()
|
||||
run: |
|
||||
echo "Displaying Integration test results for ${{ matrix.os }}"
|
||||
cat /tmp/test-results-${{ matrix.os }}/integration_test_output.txt || echo "No integration test results found for ${{ matrix.os }}"
|
||||
|
||||
- name: Unit Test Results
|
||||
if: always()
|
||||
run: |
|
||||
echo "Displaying Unit Test Results for ${{ matrix.os }}"
|
||||
cat /tmp/test-results-${{ matrix.os }}/unit_test_output.txt || echo "No unit test results found for ${{ matrix.os }}"
|
||||
|
||||
- name: Example DRM Test Results
|
||||
if: always()
|
||||
run: |
|
||||
echo "Displaying Example DRM test results for ${{ matrix.os }}"
|
||||
cat /tmp/test-results-${{ matrix.os }}/amd_smi_drm_ex.log || echo "No DRM example test results found for ${{ matrix.os }}"
|
||||
|
||||
- name: Example NoDRM Test Results
|
||||
if: always()
|
||||
run: |
|
||||
echo "Displaying Example NoDRM test results for ${{ matrix.os }}"
|
||||
cat /tmp/test-results-${{ matrix.os }}/amd_smi_nodrm_ex.log || echo "No NoDRM example test results found for ${{ matrix.os }}"
|
||||
+319
@@ -0,0 +1,319 @@
|
||||
name: Auto Label PRs
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened, closed]
|
||||
workflow_run:
|
||||
workflows: ["ABI Compliance Check"]
|
||||
types: [completed]
|
||||
|
||||
jobs:
|
||||
apply-labels:
|
||||
runs-on: AMD-ROCm-Internal-dev1
|
||||
permissions:
|
||||
pull-requests: write
|
||||
actions: read
|
||||
contents: read
|
||||
steps:
|
||||
- name: Add/Remove labels based on branch names and ABI results
|
||||
uses: actions/github-script@v6
|
||||
with:
|
||||
script: |
|
||||
const pr = context.payload.pull_request;
|
||||
let prNumber, headSha, baseBranch, headBranch;
|
||||
|
||||
// Handle different event types
|
||||
if (context.eventName === 'pull_request') {
|
||||
prNumber = pr.number;
|
||||
headSha = pr.head.sha;
|
||||
baseBranch = pr.base.ref;
|
||||
headBranch = pr.head.ref;
|
||||
} else if (context.eventName === 'workflow_run') {
|
||||
// Find the associated PR for workflow_run events
|
||||
const workflowRun = context.payload.workflow_run;
|
||||
console.log(`Workflow run completed: ${workflowRun.name} with conclusion: ${workflowRun.conclusion}`);
|
||||
|
||||
if (workflowRun.event !== 'pull_request') {
|
||||
console.log('Workflow run was not triggered by a pull request, skipping');
|
||||
return;
|
||||
}
|
||||
|
||||
const prs = await github.rest.pulls.list({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
state: 'open',
|
||||
head: `${context.repo.owner}:${workflowRun.head_branch}`
|
||||
});
|
||||
|
||||
const associatedPr = prs.data.find(p => p.head.sha === workflowRun.head_sha);
|
||||
|
||||
if (!associatedPr) {
|
||||
console.log('No associated PR found for this workflow run');
|
||||
return;
|
||||
}
|
||||
|
||||
prNumber = associatedPr.number;
|
||||
headSha = associatedPr.head.sha;
|
||||
baseBranch = associatedPr.base.ref;
|
||||
headBranch = associatedPr.head.ref;
|
||||
} else {
|
||||
console.log('Unsupported event type');
|
||||
return;
|
||||
}
|
||||
|
||||
let labelsApplied = false;
|
||||
|
||||
// Debug information
|
||||
console.log(`Processing PR #${prNumber}: Head: ${headBranch}, Base: ${baseBranch}`);
|
||||
|
||||
// Get current PR data to check existing labels
|
||||
const { data: currentPr } = await github.rest.pulls.get({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
pull_number: prNumber
|
||||
});
|
||||
const existingLabels = currentPr.labels.map(label => label.name);
|
||||
|
||||
// Condition 1: PR targeting amd-mainline
|
||||
if (baseBranch === 'amd-mainline' && context.eventName === 'pull_request') {
|
||||
const labelToAdd = 'Merge amd-mainline';
|
||||
try {
|
||||
if (!existingLabels.includes(labelToAdd)) {
|
||||
await github.rest.issues.addLabels({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: prNumber,
|
||||
labels: [labelToAdd]
|
||||
});
|
||||
console.log(`Added label "${labelToAdd}" to PR #${prNumber}`);
|
||||
labelsApplied = true;
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`Error adding label "${labelToAdd}": ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Condition 2: Cherry-pick based on head branch name or release target
|
||||
if (context.eventName === 'pull_request') {
|
||||
const isCherryPickHead = /cherry.*pick/i.test(headBranch);
|
||||
const isReleaseTargetBase = baseBranch.startsWith('release/');
|
||||
|
||||
if (isCherryPickHead || isReleaseTargetBase) {
|
||||
const labelToAdd = 'cherry-pick';
|
||||
try {
|
||||
if (!existingLabels.includes(labelToAdd)) {
|
||||
await github.rest.issues.addLabels({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: prNumber,
|
||||
labels: [labelToAdd]
|
||||
});
|
||||
console.log(`Added label "${labelToAdd}" to PR #${prNumber}`);
|
||||
labelsApplied = true;
|
||||
} else {
|
||||
console.log(`Label "${labelToAdd}" already exists on PR #${prNumber}`);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`Error adding label "${labelToAdd}": ${error.message}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ABI BREAKAGE LOGIC: Check on both workflow_run AND pull_request events
|
||||
let shouldCheckABI = false;
|
||||
let hasMajorAbiBreakage = false;
|
||||
let hasMinorAbiBreakage = false;
|
||||
|
||||
if (context.eventName === 'workflow_run') {
|
||||
// Handle workflow_run events (existing logic)
|
||||
const workflowRun = context.payload.workflow_run;
|
||||
|
||||
if (workflowRun.name === 'ABI Compliance Check') {
|
||||
shouldCheckABI = true;
|
||||
console.log(`ABI Compliance Check completed with conclusion: ${workflowRun.conclusion}`);
|
||||
|
||||
try {
|
||||
const { data: jobs } = await github.rest.actions.listJobsForWorkflowRun({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
run_id: workflowRun.id
|
||||
});
|
||||
|
||||
// Check job conclusions for ABI breakage
|
||||
for (const job of jobs.jobs) {
|
||||
console.log(`Job: ${job.name}, Conclusion: ${job.conclusion}`);
|
||||
|
||||
if (job.name.includes('Major ABI') && job.conclusion === 'failure') {
|
||||
hasMajorAbiBreakage = true;
|
||||
console.log('Major ABI breakage detected from job failure');
|
||||
}
|
||||
|
||||
if (job.name.includes('Minor ABI') && job.conclusion === 'failure') {
|
||||
hasMinorAbiBreakage = true;
|
||||
console.log('Minor ABI breakage detected from job failure');
|
||||
}
|
||||
}
|
||||
|
||||
// If workflow succeeded, no ABI breakage
|
||||
if (workflowRun.conclusion === 'success') {
|
||||
console.log('ABI Compliance Check succeeded - no ABI breakage');
|
||||
hasMajorAbiBreakage = false;
|
||||
hasMinorAbiBreakage = false;
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
console.log(`Could not fetch job details: ${error.message}`);
|
||||
return;
|
||||
}
|
||||
}
|
||||
} else if (context.eventName === 'pull_request') {
|
||||
// NEW: Check if amdsmi.h has been reverted on PR events
|
||||
const hasAbiLabels = existingLabels.includes('MAJOR ABI BREAKAGE') || existingLabels.includes('MINOR ABI BREAKAGE');
|
||||
|
||||
if (hasAbiLabels) {
|
||||
console.log('PR has ABI labels, checking if amdsmi.h changes were reverted...');
|
||||
shouldCheckABI = true;
|
||||
|
||||
try {
|
||||
// Get the diff for amdsmi.h between base and head
|
||||
const { data: comparison } = await github.rest.repos.compareCommits({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
base: currentPr.base.sha,
|
||||
head: currentPr.head.sha
|
||||
});
|
||||
|
||||
// Check if amdsmi.h has any changes
|
||||
const amdsmiFile = comparison.files?.find(file => file.filename === 'include/amd_smi/amdsmi.h');
|
||||
|
||||
if (!amdsmiFile) {
|
||||
console.log('No changes to amdsmi.h found in this PR - removing ABI labels');
|
||||
hasMajorAbiBreakage = false;
|
||||
hasMinorAbiBreakage = false;
|
||||
} else if (amdsmiFile.changes === 0) {
|
||||
console.log('amdsmi.h file exists but has no changes - removing ABI labels');
|
||||
hasMajorAbiBreakage = false;
|
||||
hasMinorAbiBreakage = false;
|
||||
} else {
|
||||
console.log(`amdsmi.h has ${amdsmiFile.changes} changes - keeping existing ABI labels`);
|
||||
// Keep existing labels since we can't determine ABI status without running the check
|
||||
hasMajorAbiBreakage = existingLabels.includes('MAJOR ABI BREAKAGE');
|
||||
hasMinorAbiBreakage = existingLabels.includes('MINOR ABI BREAKAGE');
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
console.log(`Error checking file changes: ${error.message}`);
|
||||
// If we can't check, preserve existing labels
|
||||
hasMajorAbiBreakage = existingLabels.includes('MAJOR ABI BREAKAGE');
|
||||
hasMinorAbiBreakage = existingLabels.includes('MINOR ABI BREAKAGE');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Manage ABI breakage labels (only if we determined ABI status)
|
||||
if (shouldCheckABI) {
|
||||
const abiLabels = {
|
||||
'MAJOR ABI BREAKAGE': hasMajorAbiBreakage,
|
||||
'MINOR ABI BREAKAGE': hasMinorAbiBreakage
|
||||
};
|
||||
|
||||
const wasMajorAbiBreakage = existingLabels.includes('MAJOR ABI BREAKAGE');
|
||||
const wasMinorAbiBreakage = existingLabels.includes('MINOR ABI BREAKAGE');
|
||||
|
||||
for (const [labelName, shouldHaveLabel] of Object.entries(abiLabels)) {
|
||||
const hasLabel = existingLabels.includes(labelName);
|
||||
|
||||
if (shouldHaveLabel && !hasLabel) {
|
||||
// Add label
|
||||
try {
|
||||
await github.rest.issues.addLabels({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: prNumber,
|
||||
labels: [labelName]
|
||||
});
|
||||
console.log(`✅ Added label "${labelName}" to PR #${prNumber}`);
|
||||
labelsApplied = true;
|
||||
} catch (error) {
|
||||
console.error(`❌ Error adding label "${labelName}": ${error.message}`);
|
||||
}
|
||||
} else if (!shouldHaveLabel && hasLabel) {
|
||||
// Remove label
|
||||
try {
|
||||
await github.rest.issues.removeLabel({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: prNumber,
|
||||
name: labelName
|
||||
});
|
||||
console.log(`🗑️ Removed label "${labelName}" from PR #${prNumber}`);
|
||||
labelsApplied = true;
|
||||
} catch (error) {
|
||||
console.error(`❌ Error removing label "${labelName}": ${error.message}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add comments when ABI issues are detected or resolved
|
||||
if (context.eventName === 'workflow_run') {
|
||||
// Only add comments for workflow_run events (actual ABI check results)
|
||||
if (hasMajorAbiBreakage && !wasMajorAbiBreakage) {
|
||||
await github.rest.issues.createComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: prNumber,
|
||||
body: '⚠️ **MAJOR ABI BREAKAGE detected** in the latest ABI compliance check. Please review the ABI compliance report and fix any breaking changes.'
|
||||
});
|
||||
}
|
||||
|
||||
if (hasMinorAbiBreakage && !wasMinorAbiBreakage) {
|
||||
await github.rest.issues.createComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: prNumber,
|
||||
body: '⚠️ **MINOR ABI BREAKAGE detected** in the latest ABI compliance check. Please review the ABI compliance report for details.'
|
||||
});
|
||||
}
|
||||
|
||||
if (!hasMajorAbiBreakage && wasMajorAbiBreakage) {
|
||||
await github.rest.issues.createComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: prNumber,
|
||||
body: '✅ **MAJOR ABI BREAKAGE resolved** - ABI compliance check is now passing!'
|
||||
});
|
||||
}
|
||||
|
||||
if (!hasMinorAbiBreakage && wasMinorAbiBreakage) {
|
||||
await github.rest.issues.createComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: prNumber,
|
||||
body: '✅ **MINOR ABI BREAKAGE resolved** - ABI compliance check is now passing!'
|
||||
});
|
||||
}
|
||||
} else if (context.eventName === 'pull_request') {
|
||||
// Add comment when labels are removed due to file reversion
|
||||
if (!hasMajorAbiBreakage && wasMajorAbiBreakage) {
|
||||
await github.rest.issues.createComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: prNumber,
|
||||
body: '✅ **MAJOR ABI BREAKAGE resolved** - `amdsmi.h` changes have been reverted.'
|
||||
});
|
||||
}
|
||||
|
||||
if (!hasMinorAbiBreakage && wasMinorAbiBreakage) {
|
||||
await github.rest.issues.createComment({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
issue_number: prNumber,
|
||||
body: '✅ **MINOR ABI BREAKAGE resolved** - `amdsmi.h` changes have been reverted.'
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!labelsApplied && context.eventName === 'pull_request') {
|
||||
console.log(`PR #${prNumber} did not match criteria for automatic labeling by this workflow.`);
|
||||
}
|
||||
@@ -0,0 +1,99 @@
|
||||
# caution: most of this file was written using Claude 3.7 Sonnet
|
||||
name: CMake Format Check
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ amd-staging ]
|
||||
paths:
|
||||
- '**/*.cmake'
|
||||
- '**/CMakeLists.txt'
|
||||
- '**/*.cmake.in'
|
||||
pull_request:
|
||||
branches: [ amd-staging ]
|
||||
paths:
|
||||
- '**/*.cmake'
|
||||
- '**/CMakeLists.txt'
|
||||
- '**/*.cmake.in'
|
||||
workflow_dispatch: # Allows manual triggering
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
|
||||
jobs:
|
||||
check-cmake-format:
|
||||
name: Check CMake files formatting
|
||||
runs-on: self-hosted
|
||||
container: catthehacker/ubuntu:act-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0 # Full history for better diff context
|
||||
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.10'
|
||||
cache: 'pip'
|
||||
|
||||
- name: Install cmake-format
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install cmake-format==0.6.13
|
||||
|
||||
- name: Check CMake formatting
|
||||
id: check-format
|
||||
run: |
|
||||
echo "::group::Finding CMake files"
|
||||
FILES=$(find . -type f \( -name "CMakeLists.txt" -o -name "*.cmake" -o -name "*.cmake.in" \) \
|
||||
-not -path "*/esmi_ib_library/*" \
|
||||
-not -path "*/\.*" \
|
||||
-not -path "*/build/*")
|
||||
echo "Found $(echo "$FILES" | wc -l) CMake files to check"
|
||||
echo "::endgroup::"
|
||||
|
||||
# Create an array to store failed files
|
||||
declare -a failed_files
|
||||
|
||||
# Check if files are formatted correctly
|
||||
for file in $FILES; do
|
||||
echo "Checking $file..."
|
||||
if ! cmake-format --check "$file"; then
|
||||
failed_files+=("$file")
|
||||
echo "::error file=$file::File needs formatting"
|
||||
fi
|
||||
done
|
||||
|
||||
# Generate report and exit with error if any files failed
|
||||
if [ ${#failed_files[@]} -ne 0 ]; then
|
||||
echo "Failed files: ${failed_files[*]}"
|
||||
echo "FAILED_FILES=${failed_files[*]}" >> $GITHUB_ENV
|
||||
exit 1
|
||||
else
|
||||
echo "All CMake files are formatted correctly!"
|
||||
fi
|
||||
|
||||
- name: Generate diff for failed files
|
||||
if: failure() && env.FAILED_FILES != ''
|
||||
run: |
|
||||
echo "## CMake Format Check Failed" >> $GITHUB_STEP_SUMMARY
|
||||
echo "The following files need formatting:" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
|
||||
for file in ${FAILED_FILES}; do
|
||||
echo "### $file" >> $GITHUB_STEP_SUMMARY
|
||||
done
|
||||
|
||||
cat << 'EOF' >> $GITHUB_STEP_SUMMARY
|
||||
### How to fix
|
||||
Run this command locally to fix formatting issues:
|
||||
```bash
|
||||
# Install cmake-format
|
||||
pip install cmake-format==0.6.13
|
||||
|
||||
# Format files
|
||||
cmake-format -i <file>
|
||||
```
|
||||
EOF
|
||||
+92
@@ -0,0 +1,92 @@
|
||||
name: "CodeQL Advanced"
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- amd-staging
|
||||
push:
|
||||
branches:
|
||||
- amd-staging
|
||||
schedule:
|
||||
- cron: '34 18 * * 5'
|
||||
|
||||
jobs:
|
||||
analyze:
|
||||
name: Analyze (${{ matrix.language }})
|
||||
# Runner size impacts CodeQL analysis time. To learn more, please see:
|
||||
# - https://gh.io/recommended-hardware-resources-for-running-codeql
|
||||
# - https://gh.io/supported-runners-and-hardware-resources
|
||||
# - https://gh.io/using-larger-runners (GitHub.com only)
|
||||
# Consider using larger runners or machines with greater resources for possible analysis time improvements.
|
||||
runs-on: ${{ 'ubuntu-latest' }}
|
||||
permissions:
|
||||
# required for all workflows
|
||||
security-events: write
|
||||
|
||||
# required to fetch internal or private CodeQL packs
|
||||
packages: read
|
||||
|
||||
# only required for workflows in private repositories
|
||||
actions: read
|
||||
contents: read
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- language: c-cpp
|
||||
build-mode: manual
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.12.6'
|
||||
|
||||
- name: Install CMake
|
||||
run: python3 -m pip install cmake
|
||||
|
||||
- name: Install Virtualenv
|
||||
run: python3 -m pip install virtualenv
|
||||
|
||||
- name: Install g++
|
||||
run: sudo apt-get install -y g++
|
||||
|
||||
- name: Install libdrm
|
||||
run: sudo apt-get install -y libdrm-dev
|
||||
|
||||
- name: Install DOxygen
|
||||
run: sudo apt-get install -y doxygen
|
||||
|
||||
- name: Install LaTeX
|
||||
run: sudo apt-get install -y texlive
|
||||
|
||||
- name: Clean old ROCm directories
|
||||
run: |
|
||||
sudo rm -rf /opt/rocm
|
||||
sudo rm -rf /opt/rocm-*
|
||||
|
||||
# Initializes the CodeQL tools for scanning.
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v3
|
||||
with:
|
||||
languages: ${{ matrix.language }}
|
||||
build-mode: ${{ matrix.build-mode }}
|
||||
queries: security-extended
|
||||
|
||||
- name: Create build directory
|
||||
run: mkdir -p build
|
||||
|
||||
- name: Build AMD SMI Library
|
||||
run: |
|
||||
cd build
|
||||
cmake ..
|
||||
make -j $(nproc)
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v3
|
||||
with:
|
||||
category: "/language:${{matrix.language}}"
|
||||
@@ -0,0 +1,83 @@
|
||||
name: Generate Documentation
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [amd-staging, amd-mainline, release/rocm-rel-*]
|
||||
push:
|
||||
branches: [amd-staging, amd-mainline, release/rocm-rel-*]
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
env:
|
||||
DEBIAN_FRONTEND: noninteractive
|
||||
DEBCONF_NONINTERACTIVE_SEEN: true
|
||||
BUILD_TYPE: Release
|
||||
|
||||
jobs:
|
||||
generate-docs:
|
||||
name: Generate Documentation
|
||||
runs-on: AMD-ROCm-Internal-dev1
|
||||
steps:
|
||||
- name: Checkout Repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Get branch name for artifact naming
|
||||
id: get_branch_info
|
||||
run: |
|
||||
BRANCH_NAME=""
|
||||
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
|
||||
BRANCH_NAME="${{ github.head_ref }}"
|
||||
else
|
||||
BRANCH_NAME="${{ github.ref_name }}"
|
||||
fi
|
||||
SANITIZED_NAME=$(echo "$BRANCH_NAME" | sed -e 's|/|-|g' -e 's|[^a-zA-Z0-9._-]||g' -e 's|^-*||' -e 's|-*$||')
|
||||
if [[ -z "$SANITIZED_NAME" ]]; then
|
||||
SANITIZED_NAME="docs-$(date +%s)"
|
||||
fi
|
||||
echo "sanitized_name=${SANITIZED_NAME}" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.12'
|
||||
|
||||
- name: Install System Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y doxygen graphviz
|
||||
|
||||
- name: Set Up Python Environment
|
||||
run: |
|
||||
python3 -m pip install --upgrade pip
|
||||
python3 -m pip install -r docs/sphinx/requirements.txt
|
||||
|
||||
- name: Build Documentation
|
||||
run: |
|
||||
if [ ! -e "docs/.git" ]; then
|
||||
if [ -d ".git" ]; then
|
||||
ln -s ../.git docs/.git
|
||||
fi
|
||||
fi
|
||||
cd docs
|
||||
python3 -m sphinx -T -E -b html -d _build/doctrees -D language=en . _build/html
|
||||
|
||||
- name: Upload Documentation
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: documentation-${{ steps.get_branch_info.outputs.sanitized_name }}
|
||||
path: docs/_build/html/
|
||||
|
||||
- name: Generate Job Summary
|
||||
run: |
|
||||
cat >> $GITHUB_STEP_SUMMARY << 'EOF'
|
||||
# 📚 Documentation Generated Successfully!
|
||||
|
||||
## 🚀 Quick Start
|
||||
|
||||
1. **📥 Download** the artifact `documentation-${{ steps.get_branch_info.outputs.sanitized_name }}`
|
||||
2. **📂 Extract** the ZIP file
|
||||
3. **🖱️ Double-click** `index.html`
|
||||
4. **✅ Done!** Documentation opens with full formatting in your browser
|
||||
EOF
|
||||
@@ -0,0 +1,83 @@
|
||||
name: GitHub to Gerrit Mirror
|
||||
|
||||
run-name: "Mirror to Gerrit: ${{ github.event.ref || inputs.branch }} ${{ github.event.after }}"
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
branch:
|
||||
description: 'Branch to mirror (amd-staging or amd-mainline)'
|
||||
required: true
|
||||
default: 'amd-staging'
|
||||
type: choice
|
||||
options:
|
||||
- amd-staging
|
||||
- amd-mainline
|
||||
pull_request:
|
||||
branches:
|
||||
- amd-staging
|
||||
- amd-mainline
|
||||
types: [closed]
|
||||
|
||||
env:
|
||||
GERRIT_SERVER: "gerrit-git.amd.com"
|
||||
GERRIT_PROJECT: "SYS-MGMT/ec/amd-smi"
|
||||
GERRIT_USER: "z1_runner"
|
||||
GERRIT_PORT: "29418"
|
||||
|
||||
jobs:
|
||||
Setup:
|
||||
if: github.event_name == 'workflow_dispatch' || github.event.pull_request.merged == true
|
||||
runs-on: banff-sc-cx43-29
|
||||
steps:
|
||||
|
||||
- name: Fix workspace permissions
|
||||
run: |
|
||||
sudo chown -R $(id -u):$(id -g) ${{ github.workspace }}
|
||||
sudo chmod -R u+rwX ${{ github.workspace }}
|
||||
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Create SSH key
|
||||
run: |
|
||||
mkdir -p ~/.ssh
|
||||
chmod 700 ~/.ssh
|
||||
touch ~/.ssh/known_hosts
|
||||
touch ~/.ssh/id_rsa
|
||||
chmod 600 ~/.ssh/id_rsa
|
||||
printf "%s" "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_rsa
|
||||
ssh-keyscan -p ${{ env.GERRIT_PORT }} ${{ env.GERRIT_SERVER }} >> ~/.ssh/known_hosts
|
||||
|
||||
- name: Debug SSH setup
|
||||
run: |
|
||||
ls -la ~/.ssh
|
||||
ssh -p ${{ env.GERRIT_PORT }} ${{ env.GERRIT_USER }}@${{ env.GERRIT_SERVER }} || true
|
||||
|
||||
- name: Set Gerrit remote
|
||||
run: |
|
||||
cd ${{ github.workspace }}
|
||||
if git remote | grep -q "gerrit"
|
||||
then
|
||||
git remote set-url gerrit ssh://${{ env.GERRIT_USER }}@${{ env.GERRIT_SERVER }}:${{ env.GERRIT_PORT }}/${{ env.GERRIT_PROJECT }}
|
||||
else
|
||||
git remote add gerrit ssh://${{ env.GERRIT_USER }}@${{ env.GERRIT_SERVER }}:${{ env.GERRIT_PORT }}/${{ env.GERRIT_PROJECT }}
|
||||
fi
|
||||
|
||||
- name: Set committer identity for Gerrit
|
||||
run: |
|
||||
git config user.name "z1_runner"
|
||||
git config user.email "z1_runner@amd.com"
|
||||
|
||||
- name: Fetch selected branch
|
||||
run: |
|
||||
BRANCH="${{ github.event.pull_request.base.ref || inputs.branch }}"
|
||||
git fetch origin ${BRANCH}:refs/remotes/origin/${BRANCH}
|
||||
git checkout ${BRANCH}
|
||||
|
||||
- name: Mirror selected branch to Gerrit
|
||||
run: |
|
||||
BRANCH="${{ github.event.pull_request.base.ref || inputs.branch }}"
|
||||
git push gerrit refs/heads/${BRANCH}:refs/heads/${BRANCH}
|
||||
@@ -0,0 +1,15 @@
|
||||
name: Rocm Validation Suite KWS
|
||||
on:
|
||||
push:
|
||||
branches: [amd-staging, amd-mainline]
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened]
|
||||
workflow_dispatch:
|
||||
jobs:
|
||||
kws:
|
||||
if: ${{ github.event_name == 'pull_request' }}
|
||||
uses: AMD-ROCm-Internal/rocm_ci_infra/.github/workflows/kws.yml@mainline
|
||||
secrets: inherit
|
||||
with:
|
||||
pr_number: ${{github.event.pull_request.number}}
|
||||
base_branch: ${{github.base_ref}}
|
||||
@@ -0,0 +1,25 @@
|
||||
name: ROCm CI Caller
|
||||
on:
|
||||
pull_request:
|
||||
branches: [amd-staging, release/rocm-rel-*, amd-mainline]
|
||||
types: [opened, reopened, synchronize]
|
||||
push:
|
||||
branches: [amd-mainline]
|
||||
workflow_dispatch:
|
||||
issue_comment:
|
||||
types: [created]
|
||||
|
||||
jobs:
|
||||
call-workflow:
|
||||
if: github.event_name != 'issue_comment' ||(github.event_name == 'issue_comment' && github.event.issue.pull_request && (startsWith(github.event.comment.body, '!verify') || startsWith(github.event.comment.body, '!verify release') || startsWith(github.event.comment.body, '!verify retest')))
|
||||
uses: AMD-ROCm-Internal/rocm_ci_infra/.github/workflows/rocm_ci.yml@mainline
|
||||
secrets: inherit
|
||||
with:
|
||||
input_sha: ${{github.event_name == 'pull_request' && github.event.pull_request.head.sha || (github.event_name == 'push' && github.sha) || (github.event_name == 'issue_comment' && github.event.issue.pull_request.head.sha) || github.sha}}
|
||||
input_pr_num: ${{github.event_name == 'pull_request' && github.event.pull_request.number || (github.event_name == 'issue_comment' && github.event.issue.number) || 0}}
|
||||
input_pr_url: ${{github.event_name == 'pull_request' && github.event.pull_request.html_url || (github.event_name == 'issue_comment' && github.event.issue.pull_request.html_url) || ''}}
|
||||
input_pr_title: ${{github.event_name == 'pull_request' && github.event.pull_request.title || (github.event_name == 'issue_comment' && github.event.issue.pull_request.title) || ''}}
|
||||
repository_name: ${{ github.repository }}
|
||||
base_ref: ${{github.event_name == 'pull_request' && github.event.pull_request.base.ref || (github.event_name == 'issue_comment' && github.event.issue.pull_request.base.ref) || github.ref}}
|
||||
trigger_event_type: ${{ github.event_name }}
|
||||
comment_text: ${{ github.event_name == 'issue_comment' && github.event.comment.body || '' }}
|
||||
@@ -0,0 +1,44 @@
|
||||
# NOTE! Please use 'git ls-files -i --exclude-standard'
|
||||
# command after changing this file, to see if there are
|
||||
# any tracked files which get ignored after the change.
|
||||
|
||||
# VisualStudioCode
|
||||
.vscode/
|
||||
|
||||
# build directories generated by cmake
|
||||
build/
|
||||
cmake/build/
|
||||
.cache/
|
||||
|
||||
# build artifacts
|
||||
oam/include/oam/oamConfig.h
|
||||
python_smi_tools/rsmiBindings.py
|
||||
include/amd_smi/amd_smi64Config.h
|
||||
rocm_smi/include/rocm_smi/rocm_smi64Config.h
|
||||
docs/*.pdf
|
||||
goamdsmi_shim/include/goamdsmi_shimConfig.h
|
||||
goamdsmi_shim/include/goamdsmi_shim64Config.h
|
||||
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*.egg-*
|
||||
|
||||
# documentation artifacts
|
||||
_toc.yml
|
||||
_build/
|
||||
_doxygen/
|
||||
docBin/
|
||||
|
||||
# Simulated SYSFS - for early development or debug
|
||||
device/
|
||||
|
||||
# misc
|
||||
esmi_ib_library/
|
||||
|
||||
# do NOT ignore these files
|
||||
!.clang-format
|
||||
!.clang-tidy
|
||||
!.clangd
|
||||
!.cmake-format
|
||||
!.pre-commit-config.yaml
|
||||
@@ -0,0 +1,34 @@
|
||||
# - How to use:
|
||||
# python3 -m pip install pre-commit
|
||||
# pre-commit install --install hooks
|
||||
# Upon a new commit - the hooks should automagically run
|
||||
#
|
||||
# - How to skip:
|
||||
# git commit --no-verify
|
||||
# or
|
||||
# SKIP=clang-format-docker git commit
|
||||
# SKIP=cpplint-docker git commit
|
||||
|
||||
fail_fast: false
|
||||
repos:
|
||||
# For portability I decided to use Docker containers
|
||||
- repo: https://github.com/dmitrii-galantsev/pre-commit-docker-cpplint
|
||||
rev: 0.0.3
|
||||
hooks:
|
||||
- id: clang-format-docker
|
||||
- id: cpplint-docker
|
||||
- repo: https://github.com/cheshirekow/cmake-format-precommit
|
||||
rev: v0.6.13
|
||||
hooks:
|
||||
- id: cmake-format
|
||||
# Below is a local way of running formatters and linters
|
||||
# NOTE: clang-tidy is not used in the above tests
|
||||
# - repo: https://github.com/pocc/pre-commit-hooks
|
||||
# rev: v1.3.5
|
||||
# hooks:
|
||||
# - id: clang-format
|
||||
# args: [--no-diff, -i]
|
||||
# - id: clang-tidy
|
||||
# args: [-p=build, --quiet]
|
||||
# - id: cpplint
|
||||
# args: [--verbose=5]
|
||||
@@ -0,0 +1,18 @@
|
||||
# Read the Docs configuration file
|
||||
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
|
||||
|
||||
version: 2
|
||||
|
||||
build:
|
||||
os: ubuntu-24.04
|
||||
tools:
|
||||
python: "3.12"
|
||||
|
||||
sphinx:
|
||||
configuration: docs/conf.py
|
||||
|
||||
formats: [htmlzip, pdf]
|
||||
|
||||
python:
|
||||
install:
|
||||
- requirements: docs/sphinx/requirements.txt
|
||||
File diff ditekan karena terlalu besar
Load Diff
@@ -0,0 +1,511 @@
|
||||
# SPDX-License-Identifier: MIT
|
||||
# Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
|
||||
#
|
||||
# Minimum version of cmake required
|
||||
#
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
|
||||
set(AMD_SMI "amd_smi")
|
||||
set(AMD_SMI_LIBS_TARGET "${AMD_SMI}_lib")
|
||||
set(CPACK_PACKAGE_NAME amd-smi-lib CACHE STRING "")
|
||||
|
||||
set(BUILD_SHARED_LIBS ON CACHE BOOL "Build shared library (.so) or not.")
|
||||
|
||||
set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules/" CACHE INTERNAL "Default module path.")
|
||||
## Include common cmake modules
|
||||
include(utils)
|
||||
include(help_package)
|
||||
find_package(PkgConfig)
|
||||
|
||||
generic_add_rocm()
|
||||
|
||||
# provide git to utilities
|
||||
find_program(GIT NAMES git)
|
||||
|
||||
## Setup the package version based on git tags.
|
||||
set(PKG_VERSION_GIT_TAG_PREFIX "amdsmi_pkg_ver")
|
||||
get_version_from_file("include/amd_smi/amdsmi.h" "MAJOR")
|
||||
get_version_from_file("include/amd_smi/amdsmi.h" "MINOR")
|
||||
get_version_from_file("include/amd_smi/amdsmi.h" "RELEASE")
|
||||
set(DEFAULT_VERSION "${MAJOR}.${MINOR}.${RELEASE}")
|
||||
get_package_version_number(${DEFAULT_VERSION} ${PKG_VERSION_GIT_TAG_PREFIX} GIT)
|
||||
message("Package version: ${PKG_VERSION_STR}")
|
||||
set(${AMD_SMI_LIBS_TARGET}_VERSION_MAJOR "${CPACK_PACKAGE_VERSION_MAJOR}")
|
||||
set(${AMD_SMI_LIBS_TARGET}_VERSION_MINOR "${CPACK_PACKAGE_VERSION_MINOR}")
|
||||
set(${AMD_SMI_LIBS_TARGET}_VERSION_PATCH "${CPACK_PACKAGE_VERSION_PATCH}")
|
||||
set(${AMD_SMI_LIBS_TARGET}_VERSION_BUILD "0")
|
||||
set(${AMD_SMI_LIBS_TARGET}_VERSION_HASH "${PKG_VERSION_HASH}")
|
||||
set(${AMD_SMI_LIBS_TARGET}_VERSION_STRING
|
||||
"${${AMD_SMI_LIBS_TARGET}_VERSION_MAJOR}.${${AMD_SMI_LIBS_TARGET}_VERSION_MINOR}.${${AMD_SMI_LIBS_TARGET}_VERSION_PATCH}+${${AMD_SMI_LIBS_TARGET}_VERSION_HASH}"
|
||||
)
|
||||
|
||||
set(DEFAULT_VERSION "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}")
|
||||
update_version_in_file("include/amd_smi/amdsmi.h" ${DEFAULT_VERSION} "#define AMDSMI_LIB_VERSION_" " *" " ")
|
||||
update_version_in_file("rust-interface/src/amdsmi_wrapper.rs" ${DEFAULT_VERSION} "AMDSMI_LIB_VERSION_" " *: *u32 *= *"
|
||||
": u32 = ")
|
||||
|
||||
# Make proper version for appending
|
||||
# Default Value is 99999
|
||||
set(ROCM_VERSION_FOR_PACKAGE "99999")
|
||||
if(DEFINED ENV{ROCM_LIBPATCH_VERSION})
|
||||
set(ROCM_VERSION_FOR_PACKAGE $ENV{ROCM_LIBPATCH_VERSION})
|
||||
endif()
|
||||
#Prepare final version for the CPACK use
|
||||
set(CPACK_PACKAGE_VERSION
|
||||
"${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}.${ROCM_VERSION_FOR_PACKAGE}"
|
||||
)
|
||||
|
||||
# The following default version values should be updated as appropriate for
|
||||
# ABI breaks (update MAJOR and MINOR), and ABI/API additions (update MINOR).
|
||||
# Until ABI stabilizes VERSION_MAJOR will be 0. This should be over-ridden
|
||||
# by git tags (through "git describe") when they are present.
|
||||
set(PKG_VERSION_MAJOR "${CPACK_PACKAGE_VERSION_MAJOR}")
|
||||
set(PKG_VERSION_MINOR "${CPACK_PACKAGE_VERSION_MINOR}")
|
||||
set(PKG_VERSION_PATCH "${CPACK_PACKAGE_VERSION_PATCH}")
|
||||
set(PKG_VERSION_NUM_COMMIT 0)
|
||||
|
||||
project(${AMD_SMI_LIBS_TARGET} DESCRIPTION "AMD System Management libraries"
|
||||
HOMEPAGE_URL "https://github.com/ROCm/amdsmi")
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
|
||||
# Link with stdc++fs for filesystem support (only for GCC < 9.0)
|
||||
set(FILESYSTEM_LIB "")
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.0)
|
||||
set(FILESYSTEM_LIB stdc++fs)
|
||||
message(STATUS "GCC ${CMAKE_CXX_COMPILER_VERSION} detected, linking with stdc++fs for filesystem support")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
include(GNUInstallDirs)
|
||||
|
||||
option(BUILD_TESTS "Build test suite" OFF)
|
||||
option(ENABLE_ASAN_PACKAGING "" OFF)
|
||||
option(ENABLE_ESMI_LIB "Build ESMI Library" ON)
|
||||
option(BUILD_EXAMPLES "Build examples" OFF)
|
||||
|
||||
# If amdsmi is built as a static library, it should support being embedded in other programs. The setting below essentially enables the -fPIC flag.
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON CACHE BOOL "Enable position independent code for all targets")
|
||||
mark_as_advanced(CMAKE_POSITION_INDEPENDENT_CODE)
|
||||
|
||||
include(CMakeDependentOption)
|
||||
# these options don't work without BUILD_SHARED_LIBS
|
||||
cmake_dependent_option(BUILD_WRAPPER "Rebuild AMDSMI-wrapper" OFF "BUILD_SHARED_LIBS" OFF)
|
||||
cmake_dependent_option(BUILD_CLI "Build AMDSMI-CLI and install" ON "BUILD_SHARED_LIBS" OFF)
|
||||
cmake_dependent_option(BUILD_RUST_WRAPPER "Build rust wrapper and install" OFF "BUILD_SHARED_LIBS" OFF)
|
||||
cmake_dependent_option(ENABLE_LDCONFIG "Set library links and caches using ldconfig." ON "BUILD_SHARED_LIBS" OFF)
|
||||
|
||||
# Set share path here because project name != amd_smi
|
||||
set(SHARE_INSTALL_PREFIX "${CMAKE_INSTALL_DATAROOTDIR}/${AMD_SMI}" CACHE STRING "Tests and Example install directory")
|
||||
|
||||
# Packaging directives
|
||||
set(CPACK_PACKAGE_CONTACT "AMD-SMILib Support <amd-smi.support@amd.com>" CACHE STRING "")
|
||||
|
||||
generic_package()
|
||||
|
||||
# Dependencies
|
||||
find_package(Threads REQUIRED)
|
||||
pkg_check_modules(DRM REQUIRED IMPORTED_TARGET libdrm)
|
||||
pkg_check_modules(DRM_AMDGPU REQUIRED IMPORTED_TARGET libdrm_amdgpu)
|
||||
|
||||
# Configuration
|
||||
function(get_imported_soname target out_var)
|
||||
get_target_property(link_libs ${target} INTERFACE_LINK_LIBRARIES)
|
||||
set(result)
|
||||
foreach(link_lib ${link_libs})
|
||||
if(result)
|
||||
message(FATAL_ERROR "Target ${target} has multiple link libraries: ${link_libs}")
|
||||
endif()
|
||||
execute_process(
|
||||
COMMAND objdump -p "${link_lib}"
|
||||
OUTPUT_VARIABLE OBJDUMP_OUTPUT
|
||||
RESULT_VARIABLE OBJDUMP_RESULT
|
||||
)
|
||||
if(OBJDUMP_RESULT EQUAL 0)
|
||||
string(REGEX MATCH "SONAME +([^ \n]+)" SONAME_MATCH "${OBJDUMP_OUTPUT}")
|
||||
if(SONAME_MATCH)
|
||||
set(SONAME_OF_MY_PKG "${CMAKE_MATCH_1}")
|
||||
message(STATUS "SONAME of my_package_name: ${SONAME_OF_MY_PKG}")
|
||||
else()
|
||||
message(FATAL_ERROR "Could not find SONAME in objdump output for ${link_lib}")
|
||||
endif()
|
||||
set(result "${SONAME_OF_MY_PKG}")
|
||||
else()
|
||||
message(FATAL_ERROR "objdump failed for ${link_lib}")
|
||||
endif()
|
||||
endforeach()
|
||||
if(NOT result)
|
||||
message(FATAL_ERROR "Could not find SONAME for target ${target} libs: ${link_libs}")
|
||||
endif()
|
||||
set("${out_var}" "${result}" PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
get_imported_soname(PkgConfig::DRM_AMDGPU LIBDRM_AMDGPU_SONAME)
|
||||
configure_file(
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/include/config/amd_smi_config.h.in"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/include/config/amd_smi_config.h"
|
||||
@ONLY
|
||||
)
|
||||
|
||||
## Compiler flags
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -fno-rtti")
|
||||
if(${CMAKE_HOST_SYSTEM_PROCESSOR} STREQUAL "x86_64")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64 -msse -msse2")
|
||||
endif()
|
||||
# Security options
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wconversion -Wcast-align")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wformat=2 -fno-common -Wstrict-overflow")
|
||||
# Intentionally leave out -Wsign-promo. It causes spurious warnings.
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Woverloaded-virtual -Wreorder")
|
||||
|
||||
set(ROCM_SRC_DIR "${PROJECT_SOURCE_DIR}/rocm_smi/src")
|
||||
set(ROCM_INC_DIR "${PROJECT_SOURCE_DIR}/rocm_smi/include/rocm_smi")
|
||||
set(SHR_MUTEX_DIR "${PROJECT_SOURCE_DIR}/third_party/shared_mutex")
|
||||
if(ENABLE_ESMI_LIB)
|
||||
# Supported esmi library version tag
|
||||
set(current_esmi_tag "esmi_pkg_ver-4.2")
|
||||
|
||||
if(NOT EXISTS ${PROJECT_SOURCE_DIR}/esmi_ib_library/src)
|
||||
# TODO: use ExternalProject_Add instead or a submodule
|
||||
message(STATUS "Adding esmi_ib_library...")
|
||||
execute_process(COMMAND git clone --depth=1 -b ${current_esmi_tag} https://github.com/amd/esmi_ib_library.git
|
||||
${PROJECT_SOURCE_DIR}/esmi_ib_library)
|
||||
else()
|
||||
message(STATUS "esmi_ib_library already installed, checking version...")
|
||||
|
||||
# Grab latest commit and get the tag
|
||||
execute_process(
|
||||
COMMAND git rev-list --tags --max-count=1
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/esmi_ib_library
|
||||
OUTPUT_VARIABLE latest_commit
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
execute_process(
|
||||
COMMAND git describe --tags ${latest_commit} --match "*pkg*"
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/esmi_ib_library
|
||||
OUTPUT_VARIABLE latest_esmi_tag
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
|
||||
# Update to latest tags if not matched
|
||||
if(NOT latest_esmi_tag STREQUAL current_esmi_tag)
|
||||
message(STATUS "Updating esmi_ib_library...")
|
||||
execute_process(
|
||||
COMMAND git clone --depth=1 -b ${current_esmi_tag} https://github.com/amd/esmi_ib_library.git
|
||||
${PROJECT_SOURCE_DIR}/esmi_ib_library_temp RESULT_VARIABLE clone_result)
|
||||
if(clone_result EQUAL 0)
|
||||
file(REMOVE_RECURSE ${PROJECT_SOURCE_DIR}/esmi_ib_library)
|
||||
file(RENAME ${PROJECT_SOURCE_DIR}/esmi_ib_library_temp ${PROJECT_SOURCE_DIR}/esmi_ib_library)
|
||||
message(STATUS "Successfully cloned updated esmi_ib_library")
|
||||
else()
|
||||
file(REMOVE_RECURSE ${PROJECT_SOURCE_DIR}/esmi_ib_library_temp)
|
||||
message(FATAL_ERROR "Failed to clone updated esmi_ib_library")
|
||||
endif()
|
||||
else()
|
||||
message(STATUS "esmi_ib_library is the latest version: ${current_esmi_tag}...")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Make sure to update the amd_hsmp.h file with the corresponding esmi version
|
||||
file(COPY "${PROJECT_SOURCE_DIR}/include/amd_smi/impl/amd_hsmp.h"
|
||||
DESTINATION "${PROJECT_SOURCE_DIR}/esmi_ib_library/include/asm")
|
||||
|
||||
add_definitions("-DENABLE_ESMI_LIB=1")
|
||||
set(ESMI_INC_DIR "${PROJECT_SOURCE_DIR}/esmi_ib_library/include")
|
||||
set(ESMI_SRC_DIR "${PROJECT_SOURCE_DIR}/esmi_ib_library/src")
|
||||
# esmi has a lot of write-strings warnings - silence them
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-write-strings")
|
||||
endif()
|
||||
|
||||
include_directories(
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include
|
||||
${CMAKE_CURRENT_BINARY_DIR}/include
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/third_party/shared_mutex
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/amd_smi
|
||||
${ESMI_INC_DIR}
|
||||
)
|
||||
|
||||
set(CMN_SRC_LIST
|
||||
"${ROCM_SRC_DIR}/rocm_smi_device.cc"
|
||||
"${ROCM_SRC_DIR}/rocm_smi_main.cc"
|
||||
"${ROCM_SRC_DIR}/rocm_smi_monitor.cc"
|
||||
"${ROCM_SRC_DIR}/rocm_smi_power_mon.cc"
|
||||
"${ROCM_SRC_DIR}/rocm_smi_utils.cc"
|
||||
"${ROCM_SRC_DIR}/rocm_smi_counters.cc"
|
||||
"${ROCM_SRC_DIR}/rocm_smi_kfd.cc"
|
||||
"${ROCM_SRC_DIR}/rocm_smi_io_link.cc"
|
||||
"${ROCM_SRC_DIR}/rocm_smi_gpu_metrics.cc"
|
||||
"${ROCM_SRC_DIR}/rocm_smi_dyn_gpu_metrics.cc"
|
||||
"${ROCM_SRC_DIR}/rocm_smi.cc"
|
||||
"${ROCM_SRC_DIR}/rocm_smi_logger.cc"
|
||||
"${SHR_MUTEX_DIR}/shared_mutex.cc"
|
||||
"${ROCM_SRC_DIR}/rocm_smi_binary_parser.cc"
|
||||
"${ROCM_SRC_DIR}/rocm_smi_board_temp.cc"
|
||||
"${ROCM_SRC_DIR}/rocm_smi_npm.cc")
|
||||
|
||||
if(ENABLE_ESMI_LIB)
|
||||
list(APPEND CMN_SRC_LIST ${ESMI_SRC_DIR}/e_smi.c)
|
||||
list(APPEND CMN_SRC_LIST ${ESMI_SRC_DIR}/e_smi_monitor.c)
|
||||
list(APPEND CMN_SRC_LIST ${ESMI_SRC_DIR}/e_smi_plat.c)
|
||||
list(APPEND CMN_SRC_LIST ${ESMI_SRC_DIR}/e_smi_utils.c)
|
||||
endif()
|
||||
|
||||
set(CMN_INC_LIST
|
||||
"${ROCM_INC_DIR}/rocm_smi_device.h"
|
||||
"${ROCM_INC_DIR}/rocm_smi_main.h"
|
||||
"${ROCM_INC_DIR}/rocm_smi_monitor.h"
|
||||
"${ROCM_INC_DIR}/rocm_smi_power_mon.h"
|
||||
"${ROCM_INC_DIR}/rocm_smi_utils.h"
|
||||
"${ROCM_INC_DIR}/rocm_smi_common.h"
|
||||
"${ROCM_INC_DIR}/rocm_smi_exception.h"
|
||||
"${ROCM_INC_DIR}/rocm_smi_counters.h"
|
||||
"${ROCM_INC_DIR}/rocm_smi_kfd.h"
|
||||
"${ROCM_INC_DIR}/rocm_smi_io_link.h"
|
||||
"${ROCM_INC_DIR}/rocm_smi_gpu_metrics.h"
|
||||
"${ROCM_INC_DIR}/rocm_smi_dyn_gpu_metrics.h"
|
||||
"${ROCM_INC_DIR}/rocm_smi.h"
|
||||
"${ROCM_INC_DIR}/rocm_smi_logger.h"
|
||||
"${SHR_MUTEX_DIR}/shared_mutex.h"
|
||||
"${ROCM_INC_DIR}/rocm_smi_binary_parser.h"
|
||||
"${ROCM_INC_DIR}/rocm_smi_board_temp.h"
|
||||
"${ROCM_INC_DIR}/rocm_smi_npm.h")
|
||||
|
||||
add_subdirectory("rocm_smi")
|
||||
add_subdirectory("src")
|
||||
|
||||
if(BUILD_TESTS)
|
||||
set(TESTS_COMPONENT "tests")
|
||||
#add_subdirectory("tests/rocm_smi_test")
|
||||
add_subdirectory("tests/amd_smi_test")
|
||||
add_subdirectory("tests/python_unittest")
|
||||
endif()
|
||||
|
||||
# python interface, CLI, and py-test depend on shared libraries
|
||||
if(BUILD_SHARED_LIBS)
|
||||
add_subdirectory("py-interface")
|
||||
if(BUILD_CLI)
|
||||
add_subdirectory("amdsmi_cli")
|
||||
endif()
|
||||
if(BUILD_RUST_WRAPPER)
|
||||
add_subdirectory("rust-interface")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(BUILD_EXAMPLES)
|
||||
add_subdirectory("example")
|
||||
endif()
|
||||
|
||||
include(CMakePackageConfigHelpers)
|
||||
|
||||
configure_package_config_file(
|
||||
amd_smi-config.cmake.in ${CMAKE_CURRENT_BINARY_DIR}/amd_smi-config.cmake
|
||||
INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${AMD_SMI}
|
||||
PATH_VARS CMAKE_INSTALL_LIBDIR CMAKE_INSTALL_INCLUDEDIR CMAKE_INSTALL_BINDIR)
|
||||
|
||||
write_basic_package_version_file(${CMAKE_CURRENT_BINARY_DIR}/amd_smi-config-version.cmake
|
||||
VERSION "${CPACK_PACKAGE_VERSION}" COMPATIBILITY SameMajorVersion)
|
||||
|
||||
install(
|
||||
FILES ${CMAKE_CURRENT_BINARY_DIR}/amd_smi-config.cmake ${CMAKE_CURRENT_BINARY_DIR}/amd_smi-config-version.cmake
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${AMD_SMI}
|
||||
COMPONENT dev)
|
||||
|
||||
# Create cmake target
|
||||
# Add all targets to the build-tree export set
|
||||
export(TARGETS ${AMD_SMI} FILE "${PROJECT_BINARY_DIR}/amd_smi_target.cmake")
|
||||
|
||||
# Export the package for use from the build-tree
|
||||
# (this registers the build-tree with a global CMake-registry)
|
||||
export(PACKAGE ${AMD_SMI})
|
||||
|
||||
install(
|
||||
EXPORT amd_smiTargets
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${AMD_SMI}
|
||||
COMPONENT dev)
|
||||
|
||||
set(CPACK_RPM_PACKAGE_LICENSE "MIT")
|
||||
if(ENABLE_ASAN_PACKAGING)
|
||||
# install license file in share/doc/amd_smi-asan folder
|
||||
install(
|
||||
FILES ${CPACK_RESOURCE_FILE_LICENSE}
|
||||
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/doc/${CPACK_PACKAGE_NAME}-asan
|
||||
RENAME LICENSE.txt
|
||||
COMPONENT asan)
|
||||
endif()
|
||||
# docs are installed into different share directory from tests and examples
|
||||
install(
|
||||
FILES ${CPACK_RESOURCE_FILE_LICENSE}
|
||||
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/doc/${CPACK_PACKAGE_NAME}
|
||||
RENAME LICENSE.txt
|
||||
COMPONENT dev)
|
||||
|
||||
install(
|
||||
FILES ${CMAKE_CURRENT_SOURCE_DIR}/README.md
|
||||
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/doc/${CPACK_PACKAGE_NAME}
|
||||
COMPONENT dev)
|
||||
install(
|
||||
DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/example
|
||||
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${AMD_SMI}
|
||||
COMPONENT dev
|
||||
FILES_MATCHING
|
||||
PATTERN "*.h"
|
||||
PATTERN "*.cc"
|
||||
PATTERN "*.txt"
|
||||
PATTERN "build*" EXCLUDE
|
||||
PATTERN ".cache*" EXCLUDE)
|
||||
|
||||
# Make for goamdsmi_shim library
|
||||
add_subdirectory(goamdsmi_shim)
|
||||
|
||||
#Debian package specific variables
|
||||
set(CPACK_DEBIAN_PACKAGE_RECOMMENDS "python3-argcomplete, libdrm-dev, libdrm-amdgpu-dev")
|
||||
set(CPACK_DEBIAN_ASAN_PACKAGE_RECOMMENDS ${CPACK_DEBIAN_PACKAGE_RECOMMENDS})
|
||||
set(CPACK_DEBIAN_DEV_PACKAGE_RECOMMENDS ${CPACK_DEBIAN_PACKAGE_RECOMMENDS})
|
||||
set(CPACK_DEBIAN_PACKAGE_DEPENDS "sudo, libc6, python3 (>= 3.6.8), python3-pip, python3-setuptools, python3-wheel")
|
||||
set(CPACK_DEBIAN_ASAN_PACKAGE_DEPENDS ${CPACK_DEBIAN_PACKAGE_DEPENDS})
|
||||
set(CPACK_DEBIAN_DEV_PACKAGE_DEPENDS ${CPACK_DEBIAN_PACKAGE_DEPENDS})
|
||||
|
||||
# $CURRENT_YEAR is used by copyright.in
|
||||
string(TIMESTAMP CURRENT_YEAR "%Y")
|
||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/copyright.in DEBIAN/copyright @ONLY)
|
||||
|
||||
## Process the Debian install/remove scripts to update the CPACK variables
|
||||
configure_file(
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/postinst.in
|
||||
DEBIAN/postinst
|
||||
@ONLY
|
||||
FILE_PERMISSIONS
|
||||
OWNER_READ
|
||||
OWNER_WRITE
|
||||
OWNER_EXECUTE
|
||||
GROUP_READ
|
||||
GROUP_EXECUTE
|
||||
WORLD_READ
|
||||
WORLD_EXECUTE)
|
||||
configure_file(
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/prerm.in
|
||||
DEBIAN/prerm
|
||||
@ONLY
|
||||
FILE_PERMISSIONS
|
||||
OWNER_READ
|
||||
OWNER_WRITE
|
||||
OWNER_EXECUTE
|
||||
GROUP_READ
|
||||
GROUP_EXECUTE
|
||||
WORLD_READ
|
||||
WORLD_EXECUTE)
|
||||
list(APPEND CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${CMAKE_CURRENT_BINARY_DIR}/DEBIAN/postinst"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/DEBIAN/prerm")
|
||||
|
||||
# Configure pre-rm for tests only
|
||||
configure_file(
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/${CPACK_PACKAGE_NAME}-tests/prerm.in
|
||||
DEBIAN/${CPACK_PACKAGE_NAME}-tests/prerm
|
||||
@ONLY
|
||||
FILE_PERMISSIONS
|
||||
OWNER_READ
|
||||
OWNER_WRITE
|
||||
OWNER_EXECUTE
|
||||
GROUP_READ
|
||||
GROUP_EXECUTE
|
||||
WORLD_READ
|
||||
WORLD_EXECUTE)
|
||||
|
||||
# Assign control scripts to the AMDSMI Lib & Tests packages
|
||||
set(CPACK_DEBIAN_DEV_PACKAGE_CONTROL_EXTRA "${CMAKE_CURRENT_BINARY_DIR}/DEBIAN/prerm")
|
||||
set(CPACK_DEBIAN_TESTS_PACKAGE_CONTROL_EXTRA "${CMAKE_CURRENT_BINARY_DIR}/DEBIAN/${CPACK_PACKAGE_NAME}-tests/prerm")
|
||||
|
||||
# install copyright file into share/doc/amd-smi-lib/copyright
|
||||
# required for debian package compliance
|
||||
install(
|
||||
FILES "${CMAKE_CURRENT_BINARY_DIR}/DEBIAN/copyright"
|
||||
DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/doc/${CPACK_PACKAGE_NAME}
|
||||
COMPONENT dev)
|
||||
|
||||
# RPM package specific variables
|
||||
set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION
|
||||
"${CPACK_PACKAGING_INSTALL_PREFIX} ${CPACK_PACKAGING_INSTALL_PREFIX}/${CMAKE_INSTALL_BINDIR}")
|
||||
#Set rpm distro
|
||||
if(CPACK_RPM_PACKAGE_RELEASE)
|
||||
set(CPACK_RPM_PACKAGE_RELEASE_DIST ON)
|
||||
endif()
|
||||
# NOTE: RPM SUGGESTS DO NOT WORK! https://bugzilla.redhat.com/show_bug.cgi?id=1811358
|
||||
set(CPACK_RPM_PACKAGE_SUGGESTS "python3-argcomplete, libdrm-dev, libdrm-amdgpu-dev")
|
||||
set(CPACK_RPM_DEV_PACKAGE_SUGGESTS ${CPACK_RPM_PACKAGE_SUGGESTS})
|
||||
set(CPACK_RPM_ASAN_PACKAGE_SUGGESTS ${CPACK_RPM_PACKAGE_SUGGESTS})
|
||||
# python version gated by rhel8 :(
|
||||
set(CPACK_RPM_PACKAGE_REQUIRES "sudo, python3 >= 3.6.8, python3-pip, python3-wheel, python3-setuptools")
|
||||
set(CPACK_RPM_DEV_PACKAGE_REQUIRES ${CPACK_RPM_PACKAGE_REQUIRES})
|
||||
set(CPACK_RPM_ASAN_PACKAGE_REQUIRES ${CPACK_RPM_PACKAGE_REQUIRES})
|
||||
|
||||
# don't terminate if bytecompile of python files fails
|
||||
set(CPACK_RPM_SPEC_MORE_DEFINE "%define _python_bytecompile_errors_terminate_build 0")
|
||||
# Cpack converts !/usr/bin/env python3 to /usr/libexec/platform-python in RHEL8.
|
||||
# prevent the BRP(buildroot policy) script from checking and modifying interpreter directives
|
||||
string(APPEND CPACK_RPM_SPEC_MORE_DEFINE "\n%undefine __brp_mangle_shebangs")
|
||||
|
||||
# Add rocm-core dependency if -DROCM_DEP_ROCMCORE=ON is passed
|
||||
if(ROCM_DEP_ROCMCORE)
|
||||
string(APPEND CPACK_DEBIAN_ASAN_PACKAGE_DEPENDS ", rocm-core-asan")
|
||||
string(APPEND CPACK_RPM_ASAN_PACKAGE_REQUIRES ", rocm-core-asan")
|
||||
string(APPEND CPACK_DEBIAN_DEV_PACKAGE_DEPENDS ", rocm-core")
|
||||
string(APPEND CPACK_RPM_DEV_PACKAGE_REQUIRES ", rocm-core")
|
||||
string(APPEND CPACK_DEBIAN_PACKAGE_DEPENDS ", rocm-core")
|
||||
string(APPEND CPACK_RPM_PACKAGE_REQUIRES ", rocm-core")
|
||||
endif()
|
||||
|
||||
## Enable Component Mode and set component specific flags
|
||||
set(CPACK_DEB_COMPONENT_INSTALL ON)
|
||||
set(CPACK_DEBIAN_DEV_PACKAGE_NAME "${CPACK_PACKAGE_NAME}")
|
||||
set(CPACK_DEBIAN_TESTS_PACKAGE_NAME "${CPACK_PACKAGE_NAME}-tests")
|
||||
set(CPACK_DEBIAN_ASAN_PACKAGE_NAME "${CPACK_PACKAGE_NAME}-asan")
|
||||
set(CPACK_RPM_COMPONENT_INSTALL ON)
|
||||
set(CPACK_RPM_DEV_PACKAGE_NAME "${CPACK_PACKAGE_NAME}")
|
||||
set(CPACK_RPM_TESTS_PACKAGE_NAME "${CPACK_PACKAGE_NAME}-tests")
|
||||
set(CPACK_RPM_ASAN_PACKAGE_NAME "${CPACK_PACKAGE_NAME}-asan")
|
||||
if(ENABLE_ASAN_PACKAGING)
|
||||
# ASAN Package requires only asan component with libraries and license file
|
||||
set(CPACK_COMPONENTS_ALL asan)
|
||||
else()
|
||||
set(CPACK_COMPONENTS_ALL dev tests)
|
||||
endif()
|
||||
|
||||
# The line below doesn't currently work; it may be this issue:
|
||||
# https://bugzilla.redhat.com/show_bug.cgi?id=1811358
|
||||
# set(CPACK_RPM_PACKAGE_SUGGESTS "sudo, libdrm-dev")
|
||||
|
||||
## Process the Rpm install/remove scripts to update the CPACK variables
|
||||
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/RPM/post.in" RPM/post @ONLY)
|
||||
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/RPM/preun.in" RPM/preun @ONLY)
|
||||
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/RPM/postun.in" RPM/postun @ONLY)
|
||||
set(CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${CMAKE_CURRENT_BINARY_DIR}/RPM/post")
|
||||
set(CPACK_RPM_PRE_UNINSTALL_SCRIPT_FILE "${CMAKE_CURRENT_BINARY_DIR}/RPM/preun")
|
||||
set(CPACK_RPM_POST_UNINSTALL_SCRIPT_FILE "${CMAKE_CURRENT_BINARY_DIR}/RPM/postun")
|
||||
|
||||
#Set the names now using CPACK utility
|
||||
set(CPACK_DEBIAN_FILE_NAME "DEB-DEFAULT")
|
||||
set(CPACK_RPM_FILE_NAME "RPM-DEFAULT")
|
||||
|
||||
include(CPack)
|
||||
|
||||
generic_package_post()
|
||||
@@ -0,0 +1,3 @@
|
||||
set noparent
|
||||
linelength=100
|
||||
filter=-build/include_subdir,-legal/copyright,-runtime/printf,-build/c++11,-runtime/int,-build/header_guard
|
||||
+127
@@ -0,0 +1,127 @@
|
||||
#!/bin/bash
|
||||
|
||||
#
|
||||
# Copyright (C) Advanced Micro Devices. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
# this software and associated documentation files (the "Software"), to deal in
|
||||
# the Software without restriction, including without limitation the rights to
|
||||
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
# the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
# subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
# Other prerm actions
|
||||
rm_ldconfig() {
|
||||
# left-hand term originates from ENABLE_LDCONFIG = ON/OFF at package build
|
||||
if [ "@ENABLE_LDCONFIG@" == "ON" ]; then
|
||||
rm -f /etc/ld.so.conf.d/x86_64-libamd_smi_lib.conf
|
||||
ldconfig
|
||||
fi
|
||||
}
|
||||
|
||||
|
||||
rm_leftovers() {
|
||||
# remove pyc files generated by python
|
||||
rm -rf "@CPACK_PACKAGING_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBEXECDIR@/amdsmi_cli/__pycache__"
|
||||
rm -rf "@CPACK_PACKAGING_INSTALL_PREFIX@/@SHARE_INSTALL_PREFIX@/amdsmi/__pycache__"
|
||||
|
||||
# remove build and egg files
|
||||
rm -rf "@CPACK_PACKAGING_INSTALL_PREFIX@/@SHARE_INSTALL_PREFIX@/amdsmi.egg-info"
|
||||
rm -rf "@CPACK_PACKAGING_INSTALL_PREFIX@/@SHARE_INSTALL_PREFIX@/build"
|
||||
|
||||
# remove leftover doc files
|
||||
if test -e "@CPACK_PACKAGING_INSTALL_PREFIX@/@SHARE_INSTALL_PREFIX@/../doc/amd_smi*"; then
|
||||
rm -rf "@CPACK_PACKAGING_INSTALL_PREFIX@/@SHARE_INSTALL_PREFIX@/../doc/amd_smi*"
|
||||
fi
|
||||
}
|
||||
|
||||
rm_logFolder() {
|
||||
rm -rf /var/log/amd_smi_lib
|
||||
}
|
||||
|
||||
|
||||
rm_rocm_tests_dir(){
|
||||
if [ -d "@CPACK_PACKAGING_INSTALL_PREFIX@/share/amd_smi/tests/" ]; then
|
||||
rm -rf "@CPACK_PACKAGING_INSTALL_PREFIX@/share/amd_smi/tests/"
|
||||
echo "Removed ROCm tests directory."
|
||||
fi
|
||||
}
|
||||
|
||||
|
||||
return_logrotateToOrigConfig() {
|
||||
local logrotateConfFile=/etc/logrotate.d/amd_smi.conf
|
||||
if [ -f $logrotateConfFile ]; then
|
||||
rm -rf "$logrotateConfFile"
|
||||
fi
|
||||
if [ -f /etc/cron.hourly/logrotate ]; then
|
||||
mv /etc/cron.hourly/logrotate /etc/cron.daily/logrotate
|
||||
fi
|
||||
if [ -f /lib/systemd/system/logrotate.timer.backup ]; then
|
||||
cp /lib/systemd/system/logrotate.timer.backup /lib/systemd/system/logrotate.timer
|
||||
rm -rf /lib/systemd/system/logrotate.timer.backup
|
||||
systemctl reenable --now logrotate.timer
|
||||
fi
|
||||
}
|
||||
|
||||
rm_python_lib() {
|
||||
# get python version
|
||||
local python3_minor_version
|
||||
python3_minor_version=$(python3 -c 'import sys;print(sys.version_info.minor)')
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "[WARNING] Could not determine python version. "\
|
||||
"AMD-SMI python library will not be uninstalled."
|
||||
return
|
||||
fi
|
||||
|
||||
# check if python version is supported
|
||||
if [ "$python3_minor_version" -lt 6 ]; then
|
||||
echo "[WARNING] AMD-SMI python library is not supported on python version 3.$python3_minor_version. "\
|
||||
"AMD-SMI python library will not be uninstalled."
|
||||
return
|
||||
fi
|
||||
|
||||
# Remove old python library
|
||||
local pip_list_output
|
||||
pip_list_output=$(python3 -m pip list --format=columns --disable-pip-version-check)
|
||||
# check pip list output for amdsmi
|
||||
if [[ $pip_list_output == *"amdsmi"* ]]; then
|
||||
PIP_ROOT_USER_ACTION=ignore PIP_BREAK_SYSTEM_PACKAGES=1 python3 -m pip uninstall amdsmi --yes --quiet --disable-pip-version-check
|
||||
fi
|
||||
|
||||
pip_list_output=$(python3 -m pip list --format=columns --disable-pip-version-check)
|
||||
# check pip list output for amdsmi
|
||||
if [[ $pip_list_output == *"amdsmi"* ]]; then
|
||||
echo "[WARNING] AMD-SMI python library (amdsmi) is still installed in pip. "\
|
||||
"Check post install to ensure version is correct"
|
||||
else
|
||||
echo "Removed AMD-SMI python library (amdsmi)..."
|
||||
fi
|
||||
}
|
||||
|
||||
|
||||
case "$1" in
|
||||
( remove | upgrade)
|
||||
# remove old gpuv-smi symlink
|
||||
rm -f @CPACK_PACKAGING_INSTALL_PREFIX@/bin/gpuv-smi &> /dev/null
|
||||
echo "Removing AMDSMI Lib Tests Packages..."
|
||||
rm_ldconfig
|
||||
echo "ldconfig removed"
|
||||
rm_leftovers
|
||||
echo "leftovers removed"
|
||||
;;
|
||||
( purge )
|
||||
;;
|
||||
( * )
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
@@ -0,0 +1,26 @@
|
||||
Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
|
||||
Upstream-Name: amdsmi
|
||||
Source: https://github.com/ROCm/amdsmi.git
|
||||
|
||||
Files: *
|
||||
Copyright: @CURRENT_YEAR@ Advanced Micro Devices, Inc.
|
||||
License: MIT
|
||||
|
||||
License: MIT
|
||||
Permission is hereby granted, free of charge, to any person obtaining a
|
||||
copy of this software and associated documentation files (the "Software"),
|
||||
to deal in the Software without restriction, including without limitation
|
||||
the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
and/or sell copies of the Software, and to permit persons to whom the
|
||||
Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included
|
||||
in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
Executable
+210
@@ -0,0 +1,210 @@
|
||||
#!/bin/bash
|
||||
|
||||
do_configureLogrotate() {
|
||||
local IS_SYSTEMD=0
|
||||
local packageName="amd-smi-lib"
|
||||
local logPath=/var/log/amd_smi_lib
|
||||
local logFile="${logPath}/AMD-SMI-lib.log"
|
||||
local logrotateConfFile=/etc/logrotate.d/amd_smi.conf
|
||||
|
||||
mkdir -p "${logPath}"
|
||||
touch "${logFile}"
|
||||
chmod -R a+rw "${logPath}"
|
||||
chmod a+rw "${logFile}"
|
||||
|
||||
command -v logrotate &>/dev/null
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "[WARNING] Detected logrotate is not installed."\
|
||||
"$packageName logs (when turned on) will not rotate properly."
|
||||
return
|
||||
fi
|
||||
|
||||
if [ ! -f $logrotateConfFile ]; then
|
||||
touch "${logrotateConfFile}"
|
||||
chmod 644 "${logrotateConfFile}" # root r/w, all others read
|
||||
# AMD SMI logging rotation, rotates files using root user/group
|
||||
# Hourly logrotation check
|
||||
# Only rotates if size grew larger than 1MB
|
||||
# Max of 4 rotation files, oldest will be removed
|
||||
# Rotated files use date extention of ex. AMD-SMI-lib.log.2023-05-09_16:51:42
|
||||
cat << EOF > "${logrotateConfFile}"
|
||||
${logFile} {
|
||||
su root root
|
||||
hourly
|
||||
missingok
|
||||
notifempty
|
||||
rotate 4
|
||||
size 1M
|
||||
copytruncate
|
||||
dateext
|
||||
dateformat .%%Y-%%m-%%d_%H:%%M:%%S
|
||||
}
|
||||
EOF
|
||||
# Fix for % S argument not found (now we escape with %%)
|
||||
# issue was RPM build thought we were using macros
|
||||
# https://gitlab.kitware.com/cmake/cmake/-/issues/22965
|
||||
# https://rpm-software-management.github.io/rpm/manual/spec.html
|
||||
sed -i s/%%/%/g "${logrotateConfFile}"
|
||||
# workaround: remove extra 'OURCE' text
|
||||
# from amd_smi.conf. Unsure if CMAKE,
|
||||
# bash, or here document
|
||||
# issue (only seen on RHEL 8.7)
|
||||
sed -i s/OURCE//g "${logrotateConfFile}"
|
||||
fi
|
||||
# check if logrotate uses system timers, Ubuntu/modern OS's do
|
||||
# Several older OS's like RHEL 8.7, do not. Instead defaults
|
||||
# to use daily cron jobs - see https://stackoverflow.com/a/69465677
|
||||
if [ -d /run/systemd/system ]; then
|
||||
systemctl list-timers | grep -iq logrotate
|
||||
if [ $? -eq 0 ]; then
|
||||
IS_SYSTEMD=1
|
||||
fi
|
||||
fi
|
||||
if [ "$IS_SYSTEMD" -eq 1 ]; then
|
||||
# Configure systemd timers - the typical setup for modern Linux logrotation setups
|
||||
if [ -f /lib/systemd/system/logrotate.timer ]; then
|
||||
if [ ! -f /lib/systemd/system/logrotate.timer.backup ]; then
|
||||
cp /lib/systemd/system/logrotate.timer /lib/systemd/system/logrotate.timer.backup
|
||||
fi
|
||||
cat << EOF > /lib/systemd/system/logrotate.timer
|
||||
[Unit]
|
||||
Description=Hourly rotation of log files
|
||||
Documentation=man:logrotate(8) man:logrotate.conf(5)
|
||||
|
||||
[Timer]
|
||||
OnCalendar=
|
||||
OnCalendar=hourly
|
||||
AccuracySec=1m
|
||||
Persistent=true
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
EOF
|
||||
systemctl reenable --now logrotate.timer
|
||||
else
|
||||
echo "[WARNING] Could not configure systemd timer for $packageName's logrotate."\
|
||||
"$packageName logs (when turned on) will not rotate properly."
|
||||
fi
|
||||
else
|
||||
# $IS_SYSTEMD -eq 0
|
||||
if [ -f /etc/cron.daily/logrotate ]; then
|
||||
# move logrotate daily to hourly
|
||||
if [ -d /etc/cron.hourly ]; then
|
||||
mv /etc/cron.daily/logrotate /etc/cron.hourly/logrotate
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
do_ldconfig() {
|
||||
# left-hand term originates from ENABLE_LDCONFIG = ON/OFF at package build
|
||||
if [ "@ENABLE_LDCONFIG@" == "ON" ]; then
|
||||
echo @CPACK_PACKAGING_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@ > /etc/ld.so.conf.d/x86_64-libamd_smi_lib.conf
|
||||
ldconfig
|
||||
fi
|
||||
}
|
||||
|
||||
do_install_amdsmi_python_lib() {
|
||||
# get python version
|
||||
local python3_minor_version
|
||||
python3_minor_version=$(python3 -c 'import sys;print(sys.version_info.minor)')
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "[WARNING] Could not determine python version. "\
|
||||
"AMD-SMI python library will not be installed."
|
||||
return
|
||||
fi
|
||||
|
||||
# check if python version is supported
|
||||
if [ "$python3_minor_version" -lt 6 ]; then
|
||||
echo "[WARNING] AMD-SMI python library is not "\
|
||||
"supported on python version 3.$python3_minor_version. "\
|
||||
"AMD-SMI python library will not be installed."
|
||||
return
|
||||
fi
|
||||
|
||||
local PREVIOUS_PIP_ROOT_USER_ACTION="$PIP_ROOT_USER_ACTION"
|
||||
export PIP_ROOT_USER_ACTION=ignore
|
||||
# python3.11 requires --break-system-packages
|
||||
local PREVIOUS_PIP_BREAK_SYSTEM_PACKAGES="$PIP_BREAK_SYSTEM_PACKAGES"
|
||||
export PIP_BREAK_SYSTEM_PACKAGES=1
|
||||
|
||||
# Remove old python library
|
||||
local amdsmi_pip_list_output
|
||||
amdsmi_pip_list_output=$(python3 -m pip list --format=columns --disable-pip-version-check)
|
||||
# check pip list output for amdsmi
|
||||
if [[ $amdsmi_pip_list_output == *"amdsmi"* ]]; then
|
||||
echo "Detected old AMD-SMI python library (amdsmi)..."
|
||||
python3 -m pip uninstall amdsmi --yes --quiet --disable-pip-version-check
|
||||
echo "Removed old AMD-SMI python library (amdsmi)..."
|
||||
fi
|
||||
|
||||
# static builds don't include python lib
|
||||
if [ "@BUILD_SHARED_LIBS@" != "ON" ]; then
|
||||
return
|
||||
fi
|
||||
|
||||
check_and_install_amdsmi() {
|
||||
local setuptools_version
|
||||
setuptools_version=$(python3 -c 'import setuptools; print(setuptools.__version__)')
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "[WARNING] Could not determine setuptools version. "\
|
||||
"AMD-SMI python library will not be installed."
|
||||
return
|
||||
fi
|
||||
|
||||
# install python library at @CPACK_PACKAGING_INSTALL_PREFIX@/@SHARE_INSTALL_PREFIX@/amdsmi
|
||||
local python_lib_path=@CPACK_PACKAGING_INSTALL_PREFIX@/@SHARE_INSTALL_PREFIX@
|
||||
local amdsmi_python_lib_path="$python_lib_path"
|
||||
local amdsmi_setup_py_path="$python_lib_path/setup.py"
|
||||
|
||||
# Decide installation method based on setuptools version
|
||||
if [[ "$(printf '%s\n' "$setuptools_version" "28.5" | sort -V | head -n1)" == "$setuptools_version" ]]; then
|
||||
echo "[WARNING] Setuptools version is less than 28.5. AMD-SMI will not be installed."
|
||||
elif [[ "$(printf '%s\n' "$setuptools_version" "41.0.1" | sort -V | head -n1)" != "41.0.1" ]]; then
|
||||
echo "Using setup.py for installation due to setuptools version $setuptools_version"
|
||||
python3 "$amdsmi_setup_py_path" install
|
||||
else
|
||||
echo "Using pyproject.toml for installation due to setuptools version $setuptools_version"
|
||||
python3 -m pip install "$amdsmi_python_lib_path" --quiet --disable-pip-version-check --no-build-isolation --no-index
|
||||
fi
|
||||
}
|
||||
|
||||
# Call the function
|
||||
check_and_install_amdsmi
|
||||
|
||||
export PIP_ROOT_USER_ACTION="$PREVIOUS_PIP_ROOT_USER_ACTION"
|
||||
export PIP_BREAK_SYSTEM_PACKAGES="$PREVIOUS_PIP_BREAK_SYSTEM_PACKAGES"
|
||||
|
||||
# only try to activate argcomplete if such command exists
|
||||
# python3-argcomplete is recommended but optional, we handle its absence gracefully
|
||||
if command -v activate-global-python-argcomplete &>/dev/null; then
|
||||
activate-global-python-argcomplete 2>/dev/null || {
|
||||
echo "[INFO] Bash completion activation skipped. You can manually enable it with: activate-global-python-argcomplete"
|
||||
}
|
||||
else
|
||||
# try older argcomplete3 version
|
||||
if command -v activate-global-python-argcomplete3 &>/dev/null; then
|
||||
activate-global-python-argcomplete3 2>/dev/null || {
|
||||
echo "[INFO] Bash completion activation skipped. You can manually enable it with: activate-global-python-argcomplete3"
|
||||
}
|
||||
else
|
||||
echo "[WARNING] Could not find argcomplete activation command. "\
|
||||
"Argument completion will not work. Install python3-argcomplete package to enable it."
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
|
||||
case "$1" in
|
||||
( configure )
|
||||
do_install_amdsmi_python_lib
|
||||
do_ldconfig
|
||||
do_configureLogrotate || exit 0
|
||||
;;
|
||||
( abort-upgrade | abort-remove | abort-deconfigure )
|
||||
echo "$1"
|
||||
;;
|
||||
( * )
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
Executable
+136
@@ -0,0 +1,136 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright (C) Advanced Micro Devices. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
# this software and associated documentation files (the "Software"), to deal in
|
||||
# the Software without restriction, including without limitation the rights to
|
||||
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
# the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
# subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
echo "Removing AMDSMI LIB Packages..."
|
||||
|
||||
# Other prerm actions
|
||||
rm_ldconfig() {
|
||||
# left-hand term originates from ENABLE_LDCONFIG = ON/OFF at package build
|
||||
if [ "@ENABLE_LDCONFIG@" == "ON" ]; then
|
||||
rm -f /etc/ld.so.conf.d/x86_64-libamd_smi_lib.conf
|
||||
ldconfig
|
||||
fi
|
||||
}
|
||||
|
||||
|
||||
rm_leftovers() {
|
||||
# remove pyc files generated by python
|
||||
rm -rf "@CPACK_PACKAGING_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBEXECDIR@/amdsmi_cli/__pycache__"
|
||||
rm -rf "@CPACK_PACKAGING_INSTALL_PREFIX@/@SHARE_INSTALL_PREFIX@/amdsmi/__pycache__"
|
||||
|
||||
# remove build and egg files
|
||||
rm -rf "@CPACK_PACKAGING_INSTALL_PREFIX@/@SHARE_INSTALL_PREFIX@/amdsmi.egg-info"
|
||||
rm -rf "@CPACK_PACKAGING_INSTALL_PREFIX@/@SHARE_INSTALL_PREFIX@/build"
|
||||
|
||||
# remove leftover doc files
|
||||
if test -e "@CPACK_PACKAGING_INSTALL_PREFIX@/@SHARE_INSTALL_PREFIX@/../doc/amd_smi*"; then
|
||||
rm -rf "@CPACK_PACKAGING_INSTALL_PREFIX@/@SHARE_INSTALL_PREFIX@/../doc/amd_smi*"
|
||||
fi
|
||||
}
|
||||
|
||||
rm_logFolder() {
|
||||
rm -rf /var/log/amd_smi_lib
|
||||
}
|
||||
|
||||
|
||||
rm_rocm_tests_dir(){
|
||||
if [ -d "@CPACK_PACKAGING_INSTALL_PREFIX@/share/amd_smi/tests/" ]; then
|
||||
rm -rf "@CPACK_PACKAGING_INSTALL_PREFIX@/share/amd_smi/tests/"
|
||||
echo "Removed ROCm tests directory."
|
||||
fi
|
||||
}
|
||||
|
||||
|
||||
return_logrotateToOrigConfig() {
|
||||
local logrotateConfFile=/etc/logrotate.d/amd_smi.conf
|
||||
if [ -f $logrotateConfFile ]; then
|
||||
rm -rf "$logrotateConfFile"
|
||||
fi
|
||||
if [ -f /etc/cron.hourly/logrotate ]; then
|
||||
mv /etc/cron.hourly/logrotate /etc/cron.daily/logrotate
|
||||
fi
|
||||
if [ -f /lib/systemd/system/logrotate.timer.backup ]; then
|
||||
cp /lib/systemd/system/logrotate.timer.backup /lib/systemd/system/logrotate.timer
|
||||
rm -rf /lib/systemd/system/logrotate.timer.backup
|
||||
systemctl reenable --now logrotate.timer
|
||||
fi
|
||||
}
|
||||
|
||||
rm_python_lib() {
|
||||
# get python version
|
||||
local python3_minor_version
|
||||
python3_minor_version=$(python3 -c 'import sys;print(sys.version_info.minor)')
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "[WARNING] Could not determine python version. "\
|
||||
"AMD-SMI python library will not be uninstalled."
|
||||
return
|
||||
fi
|
||||
|
||||
# check if python version is supported
|
||||
if [ "$python3_minor_version" -lt 6 ]; then
|
||||
echo "[WARNING] AMD-SMI python library is not supported on python version 3.$python3_minor_version. "\
|
||||
"AMD-SMI python library will not be uninstalled."
|
||||
return
|
||||
fi
|
||||
|
||||
# Remove old python library
|
||||
local pip_list_output
|
||||
pip_list_output=$(python3 -m pip list --format=columns --disable-pip-version-check)
|
||||
# check pip list output for amdsmi
|
||||
if [[ $pip_list_output == *"amdsmi"* ]]; then
|
||||
PIP_ROOT_USER_ACTION=ignore PIP_BREAK_SYSTEM_PACKAGES=1 python3 -m pip uninstall amdsmi --yes --quiet --disable-pip-version-check
|
||||
fi
|
||||
|
||||
pip_list_output=$(python3 -m pip list --format=columns --disable-pip-version-check)
|
||||
# check pip list output for amdsmi
|
||||
if [[ $pip_list_output == *"amdsmi"* ]]; then
|
||||
echo "[WARNING] AMD-SMI python library (amdsmi) is still installed in pip. "\
|
||||
"Check post install to ensure version is correct"
|
||||
else
|
||||
echo "Removed AMD-SMI python library (amdsmi)..."
|
||||
fi
|
||||
}
|
||||
|
||||
|
||||
case "$1" in
|
||||
( remove | upgrade)
|
||||
# remove old gpuv-smi symlink
|
||||
rm -f @CPACK_PACKAGING_INSTALL_PREFIX@/bin/gpuv-smi &> /dev/null
|
||||
echo "Removing AMDSMI Lib Packages..."
|
||||
rm_python_lib
|
||||
echo "python library removed"
|
||||
rm_ldconfig
|
||||
echo "ldconfig removed"
|
||||
rm_leftovers
|
||||
echo "leftovers removed"
|
||||
rm_logFolder
|
||||
echo "log folder removed"
|
||||
rm_rocm_tests_dir
|
||||
echo "rocm tests directory removed"
|
||||
return_logrotateToOrigConfig
|
||||
echo "logrotate configuration restored"
|
||||
;;
|
||||
( purge )
|
||||
;;
|
||||
( * )
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
@@ -0,0 +1 @@
|
||||
/opt/rocm/lib
|
||||
@@ -0,0 +1,39 @@
|
||||
# Use rocm/dev-ubuntu-22.04 as the base image
|
||||
FROM rocm/dev-ubuntu-22.04
|
||||
|
||||
# Set environment variables for build directories and package patterns
|
||||
ENV BUILD_FOLDER=/home/amdsmi/build
|
||||
ENV DEB_BUILD="amd-smi-lib*99999-local_amd64.deb"
|
||||
ENV DEB_BUILD_TEST="amd-smi-lib-tests*99999-local_amd64.deb"
|
||||
|
||||
# Set the working directory to /home
|
||||
WORKDIR /home
|
||||
|
||||
# Install necessary system packages
|
||||
RUN apt update && apt-get install -y git build-essential rpm pkg-config g++ python3 python3-pip python3-wheel python3-setuptools
|
||||
|
||||
# Upgrade pip and install cmake and virtualenv using pip
|
||||
RUN python3 -m pip install --upgrade pip setuptools && \
|
||||
python3 -m pip install cmake virtualenv
|
||||
|
||||
# Clone the AMD SMI repository from GitHub
|
||||
RUN git clone -b amd-mainline https://github.com/ROCm/amdsmi.git
|
||||
|
||||
# Navigate to the amdsmi directory
|
||||
WORKDIR /home/amdsmi
|
||||
|
||||
# Build and Install AMDSMI
|
||||
RUN rm -rf ${BUILD_FOLDER} && \
|
||||
mkdir -p ${BUILD_FOLDER} && \
|
||||
cd ${BUILD_FOLDER} && \
|
||||
cmake .. -DBUILD_TESTS=ON -DENABLE_ESMI_LIB=ON && \
|
||||
make -j $(nproc) VERBOSE=1 && \
|
||||
make package && \
|
||||
sudo apt install -y --allow-downgrades ${BUILD_FOLDER}/${DEB_BUILD} && \
|
||||
sudo ln -s /opt/rocm/bin/amd-smi /usr/local/bin
|
||||
|
||||
# Verify the installation of Python packages related to AMD SMI
|
||||
RUN python3 -m pip list | grep -E "amd|pip|setuptools"
|
||||
|
||||
# Set the entrypoint to bash for interactive use
|
||||
ENTRYPOINT ["/bin/bash"]
|
||||
@@ -0,0 +1,19 @@
|
||||
Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
@@ -0,0 +1,221 @@
|
||||
# AMD System Management Interface (AMD SMI) library
|
||||
|
||||
The AMD System Management Interface (AMD SMI) library offers a unified tool for managing and monitoring GPUs,
|
||||
particularly in high-performance computing environments. It provides a user-space interface that allows applications to
|
||||
control GPU operations, monitor performance, and retrieve information about the system's drivers and GPUs.
|
||||
|
||||
For information on available features, installation steps, API reference material, and helpful tips, refer to the online
|
||||
documentation at [rocm.docs.amd.com/projects/amdsmi](https://rocm.docs.amd.com/projects/amdsmi/en/latest/)
|
||||
|
||||
>[!NOTE]
|
||||
>This project is a successor to [rocm_smi_lib](https://github.com/ROCm/rocm_smi_lib)
|
||||
>and [esmi_ib_library](https://github.com/amd/esmi_ib_library).
|
||||
>This project is applicable to Linux Baremetal and Linux VM(Guest). To use AMD SMI for Virtualization, please refer to [AMD-SMI Virtualization](https://github.com/amd/MxGPU-Virtualization/tree/mainline/smi-lib).
|
||||
|
||||
## Supported platforms
|
||||
|
||||
The AMD SMI library supports Linux bare metal and Linux virtual machine guest
|
||||
for AMD GPUs and AMD EPYC™ CPUs via
|
||||
[esmi_ib_library](https://github.com/amd/esmi_ib_library).
|
||||
|
||||
AMD SMI library can run on AMD ROCm supported platforms, refer to
|
||||
[System requirements (Linux)](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html)
|
||||
for more information.
|
||||
|
||||
## Installation
|
||||
|
||||
* [Install the AMD SMI library and CLI tool](https://rocm.docs.amd.com/projects/amdsmi/en/latest/install/install.html)
|
||||
|
||||
## Requirements
|
||||
|
||||
The following are required to install and use the AMD SMI library through its language interfaces and CLI.
|
||||
|
||||
* `amdgpu` driver must be loaded for [`amdsmi_init()`](./docs/how-to/amdsmi-cpp-lib#hello-amd-smi) to work. Refer to the [Instinct documentation](https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/install/detailed-install/prerequisites.html) for installation instructions.
|
||||
* Export `LD_LIBRARY_PATH` to the `amdsmi` installation directory.
|
||||
|
||||
```bash
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/rocm/lib:/opt/rocm/lib64
|
||||
```
|
||||
|
||||
### Python interface and CLI tool prerequisites
|
||||
|
||||
* Python 3.6.8+ (64-bit)
|
||||
|
||||
### Note: No module named more_itertools warning on Azure Linux 3
|
||||
During the driver installation process on Azure Linux 3, you might encounter the `ModuleNotFoundError: No module named 'more_itertools'` warning. This warning is a result of the reintroduction of `python3-wheel` and `python3-setuptools` dependencies in the CMake of AMD SMI, which requires `more_itertools` to build these Python libraries. This issue will be fixed in a future ROCm release. As a workaround, use the following command before installation:
|
||||
```
|
||||
sudo python3 -m pip install more_itertools
|
||||
```
|
||||
|
||||
### Go API prerequisites
|
||||
|
||||
* Go version 1.20 or greater
|
||||
|
||||
## AMD SMI basic usage
|
||||
|
||||
### C++ library
|
||||
|
||||
For developers focused on performance monitoring, system diagnostics, or resource management, the AMD SMI C++ library
|
||||
offers a powerful and versatile tool to unlock the full capabilities of AMD hardware.
|
||||
|
||||
Refer to the [user guide](https://rocm.docs.amd.com/projects/amdsmi/en/latest/how-to/amdsmi-cpp-lib.html) and the
|
||||
detailed [C++ API reference](https://rocm.docs.amd.com/projects/amdsmi/en/latest/reference/amdsmi-cpp-api.html) in the
|
||||
ROCm documentation portal.
|
||||
|
||||
### Python library
|
||||
|
||||
The AMD SMI Python interface provides an easy-to-use
|
||||
[API](https://rocm.docs.amd.com/projects/amdsmi/en/latest/reference/amdsmi-py-lib.html) for interacting with AMD
|
||||
hardware. It simplifies tasks like monitoring and controlling GPU operations, allowing for rapid development.
|
||||
|
||||
Refer to the [user guide](https://rocm.docs.amd.com/projects/amdsmi/en/latest/how-to/amdsmi-py-lib.html) and the
|
||||
detailed [Python API reference](https://rocm.docs.amd.com/projects/amdsmi/en/latest/reference/amdsmi-py-api.html) in the
|
||||
ROCm documentation portal.
|
||||
|
||||
### Go library
|
||||
|
||||
The AMD SMI Go interface provides a simple
|
||||
[API](https://rocm.docs.amd.com/projects/amdsmi/en/latest/reference/amdsmi-go-lib.html)
|
||||
for AMD hardware management. It streamlines hardware monitoring and control
|
||||
while leveraging Golang's features.
|
||||
|
||||
Refer to the [user guide](https://rocm.docs.amd.com/projects/amdsmi/en/latest/how-to/amdsmi-go-lib.html) and the
|
||||
[Go API reference](https://rocm.docs.amd.com/projects/amdsmi/en/latest/reference/amdsmi-go-api.html) in the
|
||||
ROCm documentation portal.
|
||||
|
||||
### CLI tool
|
||||
|
||||
A versatile command line tool for managing and monitoring AMD hardware. You can use `amd-smi` for:
|
||||
|
||||
- Device information: Quickly retrieve detailed information about AMD GPUs
|
||||
|
||||
- Performance monitoring: Real-time monitoring of GPU utilization, memory, temperature, and power consumption
|
||||
|
||||
- Process information: Identify which processes are using GPUs
|
||||
|
||||
- Configuration management: Adjust GPU settings like clock speeds and power limits
|
||||
|
||||
- Error reporting: Monitor and report GPU errors for proactive maintenance
|
||||
|
||||
Check out
|
||||
[Getting to Know Your GPU: A Deep Dive into AMD SMI -- ROCm Blogs](https://rocm.blogs.amd.com/software-tools-optimization/amd-smi-overview/README.html)
|
||||
for a rundown.
|
||||
|
||||
### Docker container configuration
|
||||
|
||||
To ensure proper functionality of AMD SMI within a Docker container, the
|
||||
following configuration options must be included. These settings are
|
||||
particularly important for managing memory partitions, as partitioning depends
|
||||
on loading and unloading kernel drivers.
|
||||
|
||||
- `--cap-add=SYS_MODULE`
|
||||
|
||||
- `-v /lib/modules:/lib/modules`
|
||||
|
||||
See [Using AMD SMI in a Docker
|
||||
container](https://rocm.docs.amd.com/projects/amdsmi/en/latest/how-to/setup-docker-container.html)
|
||||
for more information.
|
||||
|
||||
## Building AMD SMI
|
||||
|
||||
This section describes the prerequisites and steps to build AMD SMI from source.
|
||||
|
||||
### Required software
|
||||
|
||||
To build the AMD SMI library, the following components are required. Note that the software versions specified were used
|
||||
during development; earlier versions are not guaranteed to work.
|
||||
|
||||
* CMake (v3.20.0 or later) -- `python3 -m pip install cmake`
|
||||
* g++ (v5.4.0 or later)
|
||||
* libdrm-dev (for Ubuntu and Debian)
|
||||
* libdrm-devel (for RPM-based distributions)
|
||||
|
||||
In order to build the AMD SMI Python package, the following components are required:
|
||||
|
||||
* Python (3.6.8 or later)
|
||||
* virtualenv -- `python3 -m pip install virtualenv`
|
||||
|
||||
### Build steps
|
||||
|
||||
1. Clone the AMD SMI repository to your local Linux machine.
|
||||
|
||||
```shell
|
||||
git clone https://github.com/ROCm/amdsmi.git
|
||||
```
|
||||
|
||||
2. The default installation location for the library and headers is `/opt/rocm`. Before installation, any old ROCm
|
||||
directories should be deleted:
|
||||
|
||||
* `/opt/rocm`
|
||||
* `/opt/rocm-<version_number>`
|
||||
|
||||
3. Build the library by following the typical CMake build sequence (run as root user or use `sudo` before `make install`
|
||||
command); for instance:
|
||||
|
||||
```bash
|
||||
mkdir -p build
|
||||
cd build
|
||||
cmake ..
|
||||
make -j $(nproc)
|
||||
make install
|
||||
```
|
||||
|
||||
The built library is located in the `build/` directory. To build the `rpm` and `deb` packages use the following
|
||||
command:
|
||||
|
||||
```bash
|
||||
make package
|
||||
```
|
||||
|
||||
### Rebuild the Python wrapper
|
||||
|
||||
The Python wrapper for the AMD SMI library is found in the [auto-generated file](#py_lib_fs)
|
||||
`py-interface/amdsmi_wrapper.py`. It is essential to regenerate this wrapper whenever there are changes to the C++ API.
|
||||
It is not regenerated automatically.
|
||||
|
||||
To regenerate the wrapper, use the following command.
|
||||
|
||||
```shell
|
||||
./update_wrapper.sh
|
||||
```
|
||||
|
||||
After this command, the file in `py-interface/amdsmi_wrapper.py` will be updated
|
||||
on compile.
|
||||
|
||||
>[!NOTE]
|
||||
>You need Docker installed on your system to regenerate the Python wrapper.
|
||||
|
||||
### Build the tests
|
||||
|
||||
To verify the build and capabilities of AMD SMI on your system, as well as to see practical examples of its usage, you
|
||||
can build and run the available [tests in the repository](https://github.com/ROCm/amdsmi/tree/amd-staging/tests). Follow
|
||||
these steps to build the tests:
|
||||
|
||||
```bash
|
||||
mkdir -p build
|
||||
cd build
|
||||
cmake -DBUILD_TESTS=ON ..
|
||||
make -j $(nproc)
|
||||
```
|
||||
|
||||
#### Run the tests
|
||||
|
||||
Once the tests are [built](#build-the-tests), you can run them by executing the `amdsmitst` program. The executable can
|
||||
be found at `build/tests/amd_smi_test/`.
|
||||
|
||||
### Build the docs
|
||||
|
||||
To build the documentation, follow the instructions at
|
||||
[Building documentation](https://rocm.docs.amd.com/en/latest/contribute/building.html).
|
||||
|
||||
## DISCLAIMER
|
||||
|
||||
The information contained herein is for informational purposes only, and is subject to change without notice. In
|
||||
addition, any stated support is planned and is also subject to change. While every precaution has been taken in the
|
||||
preparation of this document, it may contain technical inaccuracies, omissions and typographical errors, and AMD is
|
||||
under no obligation to update or otherwise correct this information. Advanced Micro Devices, Inc. makes no
|
||||
representations or warranties with respect to the accuracy or completeness of the contents of this document, and assumes
|
||||
no liability of any kind, including the implied warranties of noninfringement, merchantability or fitness for particular
|
||||
purposes, with respect to the operation or use of AMD hardware, software or other products described herein.
|
||||
|
||||
© 2023-2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
Executable
+205
@@ -0,0 +1,205 @@
|
||||
#!/bin/bash
|
||||
|
||||
do_configureLogrotate() {
|
||||
local IS_SYSTEMD=0
|
||||
local packageName="amd-smi-lib"
|
||||
local logPath=/var/log/amd_smi_lib
|
||||
local logFile="${logPath}/AMD-SMI-lib.log"
|
||||
local logrotateConfFile=/etc/logrotate.d/amd_smi.conf
|
||||
|
||||
mkdir -p "${logPath}"
|
||||
touch "${logFile}"
|
||||
chmod -R a+rw "${logPath}"
|
||||
chmod a+rw "${logFile}"
|
||||
|
||||
if ! command -v logrotate &>/dev/null; then
|
||||
echo "[WARNING] Detected logrotate is not installed."\
|
||||
"$packageName logs (when turned on) will not rotate properly."
|
||||
return
|
||||
fi
|
||||
|
||||
if [ ! -f $logrotateConfFile ]; then
|
||||
touch "${logrotateConfFile}"
|
||||
chmod 644 "${logrotateConfFile}" # root r/w, all others read
|
||||
# AMD SMI logging rotation, rotates files using root user/group
|
||||
# Hourly logrotation check
|
||||
# Only rotates if size grew larger than 1MB
|
||||
# Max of 4 rotation files, oldest will be removed
|
||||
# Rotated files use date extention of ex. AMD-SMI-lib.log.2023-05-09_16:51:42
|
||||
cat << EOF > "${logrotateConfFile}"
|
||||
${logFile} {
|
||||
su root root
|
||||
hourly
|
||||
missingok
|
||||
notifempty
|
||||
rotate 4
|
||||
size 1M
|
||||
copytruncate
|
||||
dateext
|
||||
dateformat .%%Y-%%m-%%d_%H:%%M:%%S
|
||||
}
|
||||
EOF
|
||||
# Fix for % S argument not found (now we escape with %%)
|
||||
# issue was RPM build thought we were using macros
|
||||
# https://gitlab.kitware.com/cmake/cmake/-/issues/22965
|
||||
# https://rpm-software-management.github.io/rpm/manual/spec.html
|
||||
sed -i s/%%/%/g "${logrotateConfFile}"
|
||||
# workaround: remove extra 'OURCE' text
|
||||
# from amd_smi.conf. Unsure if CMAKE,
|
||||
# bash, or here document
|
||||
# issue (only seen on RHEL 8.7)
|
||||
sed -i s/OURCE//g "${logrotateConfFile}"
|
||||
fi
|
||||
# check if logrotate uses system timers, Ubuntu/modern OS's do
|
||||
# Several older OS's like RHEL 8.7, do not. Instead defaults
|
||||
# to use daily cron jobs - see https://stackoverflow.com/a/69465677
|
||||
if [ -d /run/systemd/system ]; then
|
||||
systemctl list-timers | grep -iq logrotate
|
||||
if [ $? -eq 0 ]; then
|
||||
IS_SYSTEMD=1
|
||||
fi
|
||||
fi
|
||||
if [ "$IS_SYSTEMD" -eq 1 ]; then
|
||||
# Configure systemd timers - the typical setup for modern Linux logrotation setups
|
||||
if [ -f /lib/systemd/system/logrotate.timer ]; then
|
||||
if [ ! -f /lib/systemd/system/logrotate.timer.backup ]; then
|
||||
cp /lib/systemd/system/logrotate.timer /lib/systemd/system/logrotate.timer.backup
|
||||
fi
|
||||
cat << EOF > /lib/systemd/system/logrotate.timer
|
||||
[Unit]
|
||||
Description=Hourly rotation of log files
|
||||
Documentation=man:logrotate(8) man:logrotate.conf(5)
|
||||
|
||||
[Timer]
|
||||
OnCalendar=
|
||||
OnCalendar=hourly
|
||||
AccuracySec=1m
|
||||
Persistent=true
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
EOF
|
||||
systemctl reenable --now logrotate.timer
|
||||
else
|
||||
echo "[WARNING] Could not configure systemd timer for $packageName's logrotate."\
|
||||
"$packageName logs (when turned on) will not rotate properly."
|
||||
fi
|
||||
else
|
||||
# $IS_SYSTEMD -eq 0
|
||||
if [ -f /etc/cron.daily/logrotate ]; then
|
||||
# move logrotate daily to hourly
|
||||
if [ -d /etc/cron.hourly ]; then
|
||||
mv /etc/cron.daily/logrotate /etc/cron.hourly/logrotate
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
do_ldconfig() {
|
||||
# left-hand term originates from ENABLE_LDCONFIG = ON/OFF at package build
|
||||
if [ "@ENABLE_LDCONFIG@" == "ON" ]; then
|
||||
echo $RPM_INSTALL_PREFIX0/@CMAKE_INSTALL_LIBDIR@ > /etc/ld.so.conf.d/x86_64-libamd_smi_lib.conf
|
||||
ldconfig
|
||||
fi
|
||||
}
|
||||
|
||||
do_install_amdsmi_python_lib() {
|
||||
# get python version
|
||||
local python3_minor_version
|
||||
python3_minor_version=$(python3 -c 'import sys;print(sys.version_info.minor)')
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "[WARNING] Could not determine python version. "\
|
||||
"AMD-SMI python library will not be installed."
|
||||
return
|
||||
fi
|
||||
|
||||
# check if python version is supported
|
||||
if [ "$python3_minor_version" -lt 6 ]; then
|
||||
echo "[WARNING] AMD-SMI python library is not "\
|
||||
"supported on python version 3.$python3_minor_version. "\
|
||||
"AMD-SMI python library will not be installed."
|
||||
return
|
||||
fi
|
||||
|
||||
local PREVIOUS_PIP_ROOT_USER_ACTION="$PIP_ROOT_USER_ACTION"
|
||||
export PIP_ROOT_USER_ACTION=ignore
|
||||
# python3.11 requires --break-system-packages
|
||||
local PREVIOUS_PIP_BREAK_SYSTEM_PACKAGES="$PIP_BREAK_SYSTEM_PACKAGES"
|
||||
export PIP_BREAK_SYSTEM_PACKAGES=1
|
||||
|
||||
|
||||
# Remove old python library
|
||||
local amdsmi_pip_list_output
|
||||
amdsmi_pip_list_output=$(python3 -m pip list --format=columns --disable-pip-version-check)
|
||||
# check pip list output for amdsmi
|
||||
if [[ $amdsmi_pip_list_output == *"amdsmi"* ]]; then
|
||||
echo "Detected old AMD-SMI python library (amdsmi)..."
|
||||
python3 -m pip uninstall amdsmi --yes --quiet --disable-pip-version-check
|
||||
echo "Removed old AMD-SMI python library (amdsmi)..."
|
||||
fi
|
||||
|
||||
# static builds don't include python lib
|
||||
if [ "@BUILD_SHARED_LIBS@" != "ON" ]; then
|
||||
return
|
||||
fi
|
||||
|
||||
check_and_install_amdsmi() {
|
||||
local setuptools_version
|
||||
setuptools_version=$(python3 -c 'import setuptools; print(setuptools.__version__)')
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "[WARNING] Could not determine setuptools version. "\
|
||||
"AMD-SMI python library will not be installed."
|
||||
return
|
||||
fi
|
||||
|
||||
# install python library at $RPM_INSTALL_PREFIX0/@SHARE_INSTALL_PREFIX@/amdsmi
|
||||
local python_lib_path=$RPM_INSTALL_PREFIX0/@SHARE_INSTALL_PREFIX@
|
||||
local amdsmi_python_lib_path="$python_lib_path"
|
||||
local amdsmi_setup_py_path="$python_lib_path/setup.py"
|
||||
|
||||
# Decide installation method based on setuptools version
|
||||
if [[ "$(printf '%s\n' "$setuptools_version" "28.5" | sort -V | head -n1)" == "$setuptools_version" ]]; then
|
||||
echo "[WARNING] Setuptools version is less than 28.5. AMD-SMI will not be installed."
|
||||
elif [[ "$(printf '%s\n' "$setuptools_version" "41.0.1" | sort -V | head -n1)" != "41.0.1" ]]; then
|
||||
echo "Using setup.py for installation due to setuptools version $setuptools_version"
|
||||
cd $amdsmi_python_lib_path
|
||||
python3 setup.py install
|
||||
cd -
|
||||
else
|
||||
echo "Using pyproject.toml for installation due to setuptools version $setuptools_version"
|
||||
python3 -m pip install "$amdsmi_python_lib_path" --quiet --disable-pip-version-check --no-build-isolation --no-index
|
||||
fi
|
||||
}
|
||||
|
||||
# Call the function
|
||||
check_and_install_amdsmi
|
||||
|
||||
export PIP_ROOT_USER_ACTION="$PREVIOUS_PIP_ROOT_USER_ACTION"
|
||||
export PIP_BREAK_SYSTEM_PACKAGES="$PREVIOUS_PIP_BREAK_SYSTEM_PACKAGES"
|
||||
|
||||
# only try to activate argcomplete if such command exists
|
||||
# python3-argcomplete is recommended but optional, we handle its absence gracefully
|
||||
if command -v activate-global-python-argcomplete &>/dev/null; then
|
||||
activate-global-python-argcomplete 2>/dev/null || {
|
||||
echo "[INFO] Bash completion activation skipped. You can manually enable it with: activate-global-python-argcomplete"
|
||||
}
|
||||
else
|
||||
# try older argcomplete3 version
|
||||
if command -v activate-global-python-argcomplete3 &>/dev/null; then
|
||||
activate-global-python-argcomplete3 2>/dev/null || {
|
||||
echo "[INFO] Bash completion activation skipped. You can manually enable it with: activate-global-python-argcomplete3"
|
||||
}
|
||||
else
|
||||
echo "[WARNING] Could not find argcomplete activation command. "\
|
||||
"Argument completion will not work. Install python3-argcomplete package to enable it."
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
|
||||
# post install or upgrade, $i is 1 or 2 -> do these actions
|
||||
if [ "$1" -ge 1 ]; then
|
||||
do_install_amdsmi_python_lib
|
||||
do_ldconfig
|
||||
do_configureLogrotate || exit 0
|
||||
fi
|
||||
Executable
+8
@@ -0,0 +1,8 @@
|
||||
#!/bin/bash
|
||||
|
||||
# second term originates from ENABLE_LDCONFIG = ON/OFF at package build
|
||||
if [ "$1" -le 1 ] && [ "@ENABLE_LDCONFIG@" == "ON" ]; then
|
||||
# perform the below actions for rpm remove($1=0) or upgrade($1=1) operations
|
||||
rm -f /etc/ld.so.conf.d/x86_64-libamd_smi_lib.conf
|
||||
ldconfig
|
||||
fi
|
||||
Executable
+95
@@ -0,0 +1,95 @@
|
||||
#!/bin/bash
|
||||
|
||||
rm_leftovers() {
|
||||
# remove pyc files generated by python
|
||||
rm -rf "$RPM_INSTALL_PREFIX0/@CMAKE_INSTALL_LIBEXECDIR@/amdsmi_cli/__pycache__"
|
||||
rm -rf "$RPM_INSTALL_PREFIX0/@SHARE_INSTALL_PREFIX@/amdsmi/__pycache__"
|
||||
|
||||
# remove build and egg files
|
||||
rm -rf "$RPM_INSTALL_PREFIX0/@SHARE_INSTALL_PREFIX@/amdsmi.egg-info"
|
||||
rm -rf "$RPM_INSTALL_PREFIX0/@SHARE_INSTALL_PREFIX@/build"
|
||||
|
||||
# remove dist files (only applies to old setuptools versions like on RHEL8)
|
||||
rm -rf "$RPM_INSTALL_PREFIX0/@SHARE_INSTALL_PREFIX@/dist"
|
||||
|
||||
# remove leftover doc files
|
||||
if test -e "$RPM_INSTALL_PREFIX0/@SHARE_INSTALL_PREFIX@/../doc/amd_smi*"; then
|
||||
rm -rf "$RPM_INSTALL_PREFIX0/@SHARE_INSTALL_PREFIX@/../doc/amd_smi*"
|
||||
fi
|
||||
}
|
||||
|
||||
|
||||
rm_logFolder() {
|
||||
rm -rf /var/log/amd_smi_lib
|
||||
}
|
||||
|
||||
|
||||
rm_rocm_tests_dir(){
|
||||
if [ -d "$RPM_INSTALL_PREFIX0/share/amd_smi/tests/" ]; then
|
||||
rm -rf "$RPM_INSTALL_PREFIX0/share/amd_smi/tests/"
|
||||
echo "Removed ROCm tests directory."
|
||||
fi
|
||||
}
|
||||
|
||||
|
||||
return_logrotateToOrigConfig() {
|
||||
local logrotateConfFile=/etc/logrotate.d/amd_smi.conf
|
||||
if [ -f $logrotateConfFile ]; then
|
||||
rm -rf "$logrotateConfFile"
|
||||
fi
|
||||
if [ -f /etc/cron.hourly/logrotate ]; then
|
||||
mv /etc/cron.hourly/logrotate /etc/cron.daily/logrotate
|
||||
fi
|
||||
if [ -f /lib/systemd/system/logrotate.timer.backup ]; then
|
||||
cp /lib/systemd/system/logrotate.timer.backup /lib/systemd/system/logrotate.timer
|
||||
rm -rf /lib/systemd/system/logrotate.timer.backup
|
||||
systemctl reenable --now logrotate.timer
|
||||
fi
|
||||
}
|
||||
|
||||
rm_python_lib() {
|
||||
# get python version
|
||||
local python3_minor_version
|
||||
python3_minor_version=$(python3 -c 'import sys;print(sys.version_info.minor)')
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "[WARNING] Could not determine python version. "\
|
||||
"AMD-SMI python library will not be uninstalled."
|
||||
return
|
||||
fi
|
||||
|
||||
# check if python version is supported
|
||||
if [ "$python3_minor_version" -lt 6 ]; then
|
||||
echo "[WARNING] AMD-SMI python library is not supported on python version 3.$python3_minor_version. "\
|
||||
"AMD-SMI python library will not be uninstalled."
|
||||
return
|
||||
fi
|
||||
|
||||
# Remove old python library
|
||||
local pip_list_output
|
||||
pip_list_output=$(python3 -m pip list --format=columns --disable-pip-version-check)
|
||||
# check pip list output for amdsmi
|
||||
if [[ $pip_list_output == *"amdsmi"* ]]; then
|
||||
PIP_ROOT_USER_ACTION=ignore PIP_BREAK_SYSTEM_PACKAGES=1 python3 -m pip uninstall amdsmi --yes --quiet --disable-pip-version-check
|
||||
fi
|
||||
|
||||
pip_list_output=$(python3 -m pip list --format=columns --disable-pip-version-check)
|
||||
# check pip list output for amdsmi
|
||||
if [[ $pip_list_output == *"amdsmi"* ]]; then
|
||||
echo "[WARNING] AMD-SMI python library (amdsmi) is still installed in pip. "\
|
||||
"Check post install to ensure version is correct"
|
||||
else
|
||||
echo "Removed AMD-SMI python library (amdsmi)..."
|
||||
fi
|
||||
}
|
||||
|
||||
|
||||
if [ "$1" -le 1 ]; then
|
||||
# perform the below actions for rpm remove($1=0) or upgrade($1=1) operations
|
||||
# remove old gpuv-smi symlink
|
||||
rm -f $RPM_INSTALL_PREFIX0/bin/gpuv-smi &> /dev/null
|
||||
rm_python_lib
|
||||
rm_leftovers
|
||||
rm_logFolder
|
||||
rm_rocm_tests_dir
|
||||
return_logrotateToOrigConfig
|
||||
fi
|
||||
@@ -0,0 +1,27 @@
|
||||
# - Config file for the amd_smi package
|
||||
# It defines the following variables
|
||||
# AMD_SMI_INCLUDE_DIRS - include directories for amd_smi
|
||||
# AMD_SMI_LIBRARIES - libraries to link against
|
||||
|
||||
# Compute paths
|
||||
@PACKAGE_INIT@
|
||||
get_filename_component(AMD_SMI_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
|
||||
|
||||
set_and_check(amd_smi_INCLUDE_DIR "@PACKAGE_CMAKE_INSTALL_INCLUDEDIR@")
|
||||
set_and_check(AMD_SMI_INCLUDE_DIR "@PACKAGE_CMAKE_INSTALL_INCLUDEDIR@")
|
||||
set_and_check(AMD_SMI_INCLUDE_DIRS "@PACKAGE_CMAKE_INSTALL_INCLUDEDIR@")
|
||||
set_and_check(amd_smi_LIB_DIR "@PACKAGE_CMAKE_INSTALL_LIBDIR@")
|
||||
set_and_check(AMD_SMI_LIB_DIR "@PACKAGE_CMAKE_INSTALL_LIBDIR@")
|
||||
set_and_check(AMD_SMI_LIB_DIRS "@PACKAGE_CMAKE_INSTALL_LIBDIR@")
|
||||
|
||||
# Our library dependencies (contains definitions for IMPORTED targets)
|
||||
if(NOT TARGET amd_smi AND NOT amd_smi_BINARY_DIR)
|
||||
include("${AMD_SMI_CMAKE_DIR}/amd_smiTargets.cmake")
|
||||
endif()
|
||||
|
||||
# These are IMPORTED targets created by AmdSmiTargets.cmake
|
||||
# TODO: Need to check if OAM libraries are needed here!
|
||||
set(AMD_SMI_LIBRARIES amd_smi)
|
||||
set(AMD_SMI_LIBRARY amd_smi)
|
||||
|
||||
check_required_components(amd_smi)
|
||||
@@ -0,0 +1,128 @@
|
||||
# Copyright (C) Advanced Micro Devices. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
# this software and associated documentation files (the "Software"), to deal in
|
||||
# the Software without restriction, including without limitation the rights to
|
||||
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
# the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
# subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
import logging
|
||||
import re
|
||||
|
||||
|
||||
class BDF():
|
||||
""" BDF Class to cast and compare BDF objects using built-in python comparators
|
||||
|
||||
Useful for validating a BDF string and converting it to a BDF object
|
||||
This allows us to handle BDF objects in a pythonic way
|
||||
|
||||
Attributes:
|
||||
__eq__: The equals comparator
|
||||
__: An integer count of the eggs we have laid.
|
||||
"""
|
||||
def __init__(self, bdf):
|
||||
"""Init a BDF object"""
|
||||
if isinstance(bdf, BDF):
|
||||
self.segment, self.bus, self.device, self.function = tuple(bdf)
|
||||
else:
|
||||
if bdf.startswith("BDF("):
|
||||
bdf = bdf.replace('BDF(', '').replace(')', '')
|
||||
|
||||
try:
|
||||
bdf_components = [int(x, 16) for x in re.split('[:.]', bdf)]
|
||||
except self.BDFError as e:
|
||||
logging.error(f"Invalid string passed: {bdf}")
|
||||
raise e
|
||||
|
||||
self.segment = bdf_components[0] if len(bdf_components) == 4 else 0
|
||||
self.bus, self.device, self.function = bdf_components[-3:]
|
||||
if self.segment > 65535:
|
||||
raise self.BDFError("Segment can't be greater than 65535")
|
||||
if self.bus > 255:
|
||||
raise self.BDFError("Bus can't be greater than 255")
|
||||
if self.device > 31:
|
||||
raise self.BDFError("Device can't be greater than 31")
|
||||
if self.function > 7:
|
||||
raise self.BDFError("Function can't be greater than 7")
|
||||
|
||||
|
||||
class BDFError(Exception):
|
||||
"""BDF Class Error"""
|
||||
|
||||
|
||||
def __eq__(self, passed_bdf):
|
||||
"""Overrides the == operator and allows for BDF objects to be compared to BDF strings"""
|
||||
|
||||
# Only accept strings and BDF objects
|
||||
if isinstance(passed_bdf, str):
|
||||
if passed_bdf == '':
|
||||
return False
|
||||
passed_bdf = BDF(passed_bdf)
|
||||
elif not isinstance(passed_bdf, BDF):
|
||||
return False
|
||||
|
||||
if self.segment == passed_bdf.segment and \
|
||||
self.bus == passed_bdf.bus and \
|
||||
self.device == passed_bdf.device and \
|
||||
self.function == passed_bdf.function:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def __ne__(self, passed_bdf):
|
||||
"""Overrides the != operator and allows for BDF objects to be compared to BDF strings"""
|
||||
# Since we overrided the == operator we can use that to make this simple
|
||||
return not self == passed_bdf
|
||||
|
||||
|
||||
def __add__(self, passed_bdf):
|
||||
"""Overrides the + operator and allows for string concatenation"""
|
||||
return str(self) + passed_bdf
|
||||
|
||||
|
||||
def __radd__(self, passed_bdf):
|
||||
"""Overrides the + operator and allows for string concatenation"""
|
||||
return passed_bdf + str(self)
|
||||
|
||||
|
||||
def __str__(self):
|
||||
"""Cast BDF object to a string"""
|
||||
return "{:04X}:{:02X}:{:02X}:{}".format(self.segment, self.bus, self.device, self.function)
|
||||
|
||||
|
||||
def __repr__(self):
|
||||
"""How the BDF object is represented"""
|
||||
return f"BDF({self})"
|
||||
|
||||
|
||||
def __hash__(self):
|
||||
"""Allow the BDF object to be hashable"""
|
||||
return hash(str(self))
|
||||
|
||||
|
||||
def __iter__(self):
|
||||
"""Make the BDF object iterable over its 4 values"""
|
||||
yield from (self.segment, self.bus, self.device, self.function)
|
||||
|
||||
|
||||
def __contains__(self, passed_bdf):
|
||||
"""Overrided the 'in' comparator in python"""
|
||||
passed_bdf = str(BDF(passed_bdf))
|
||||
|
||||
bdf_regex = "(?:[0-6]?[0-9a-fA-F]{1,4}:)?[0-2]?[0-9a-fA-F]{1,2}:[0-9a-fA-F]{1,2}\\.[0-7]"
|
||||
for match in re.findall(bdf_regex, passed_bdf):
|
||||
if self == match:
|
||||
return True
|
||||
return False
|
||||
@@ -0,0 +1,80 @@
|
||||
message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&")
|
||||
message(" CMake AMDSMI CLI Install ")
|
||||
message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&")
|
||||
|
||||
# Set CLI Build Directory
|
||||
set(PY_PACKAGE_DIR "amdsmi_cli")
|
||||
set(PY_CLI_INSTALL_DIR "${CMAKE_INSTALL_LIBEXECDIR}" CACHE STRING "CLI tool installation directory")
|
||||
|
||||
# populate version string
|
||||
configure_file(_version.py.in ${PY_PACKAGE_DIR}/_version.py @ONLY)
|
||||
|
||||
# copy only if files are different
|
||||
add_custom_command(
|
||||
OUTPUT ${PY_PACKAGE_DIR}/__init__.py
|
||||
${PY_PACKAGE_DIR}/amdsmi_cli.py
|
||||
${PY_PACKAGE_DIR}/amdsmi_commands.py
|
||||
${PY_PACKAGE_DIR}/amdsmi_helpers.py
|
||||
${PY_PACKAGE_DIR}/amdsmi_init.py
|
||||
${PY_PACKAGE_DIR}/amdsmi_logger.py
|
||||
${PY_PACKAGE_DIR}/amdsmi_parser.py
|
||||
${PY_PACKAGE_DIR}/amdsmi_cli_exceptions.py
|
||||
${PY_PACKAGE_DIR}/BDF.py
|
||||
${PY_PACKAGE_DIR}/README.md
|
||||
${PY_PACKAGE_DIR}/Release_Notes.md
|
||||
DEPENDS amdsmi_cli
|
||||
COMMAND mkdir -p ${PY_PACKAGE_DIR}/
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_SOURCE_DIR}/__init__.py ${PY_PACKAGE_DIR}/
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_SOURCE_DIR}/amdsmi_cli.py ${PY_PACKAGE_DIR}/
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_SOURCE_DIR}/amdsmi_commands.py ${PY_PACKAGE_DIR}/
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_SOURCE_DIR}/amdsmi_helpers.py ${PY_PACKAGE_DIR}/
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_SOURCE_DIR}/amdsmi_init.py ${PY_PACKAGE_DIR}/
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_SOURCE_DIR}/amdsmi_logger.py ${PY_PACKAGE_DIR}/
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_SOURCE_DIR}/amdsmi_parser.py ${PY_PACKAGE_DIR}/
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_SOURCE_DIR}/amdsmi_cli_exceptions.py ${PY_PACKAGE_DIR}/
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_SOURCE_DIR}/BDF.py ${PY_PACKAGE_DIR}/
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_SOURCE_DIR}/README.md ${PY_PACKAGE_DIR}/
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_SOURCE_DIR}/Release_Notes.md ${PY_PACKAGE_DIR}/)
|
||||
|
||||
# The CLI requires the python amdsmi wrapper to be installed
|
||||
add_custom_target(
|
||||
amdsmi_cli ALL
|
||||
DEPENDS python_package
|
||||
${PY_PACKAGE_DIR}/__init__.py
|
||||
${PY_PACKAGE_DIR}/_version.py
|
||||
${PY_PACKAGE_DIR}/amdsmi_cli.py
|
||||
${PY_PACKAGE_DIR}/amdsmi_commands.py
|
||||
${PY_PACKAGE_DIR}/amdsmi_helpers.py
|
||||
${PY_PACKAGE_DIR}/amdsmi_init.py
|
||||
${PY_PACKAGE_DIR}/amdsmi_logger.py
|
||||
${PY_PACKAGE_DIR}/amdsmi_parser.py
|
||||
${PY_PACKAGE_DIR}/amdsmi_cli_exceptions.py
|
||||
${PY_PACKAGE_DIR}/BDF.py
|
||||
${PY_PACKAGE_DIR}/README.md
|
||||
${PY_PACKAGE_DIR}/Release_Notes.md)
|
||||
|
||||
install(
|
||||
DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/${PY_PACKAGE_DIR}
|
||||
DESTINATION ${PY_CLI_INSTALL_DIR}
|
||||
COMPONENT dev)
|
||||
|
||||
install(
|
||||
PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/${PY_PACKAGE_DIR}/amdsmi_cli.py
|
||||
DESTINATION ${PY_CLI_INSTALL_DIR}/${PY_PACKAGE_DIR}
|
||||
COMPONENT dev)
|
||||
|
||||
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin)
|
||||
|
||||
# symlink amdsmi_cli.py to amd-smi
|
||||
add_custom_target(
|
||||
link_amdsmi_cli ALL
|
||||
DEPENDS amdsmi_cli
|
||||
BYPRODUCTS ${CMAKE_CURRENT_BINARY_DIR}/bin/amd-smi
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
|
||||
COMMAND ${CMAKE_COMMAND} -E create_symlink ../${PY_CLI_INSTALL_DIR}/${PY_PACKAGE_DIR}/amdsmi_cli.py
|
||||
${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}/amd-smi)
|
||||
|
||||
install(
|
||||
FILES ${CMAKE_CURRENT_BINARY_DIR}/bin/amd-smi
|
||||
DESTINATION ${CMAKE_INSTALL_BINDIR}
|
||||
COMPONENT dev)
|
||||
@@ -0,0 +1,28 @@
|
||||
# AMD SMI CLI tool
|
||||
|
||||
A command line tool for manipulating and monitoring the `amdgpu` kernel;
|
||||
`amd-smi` is intended to replace and deprecate the existing
|
||||
[`rocm-smi`](https://github.com/rocm/rocm_smi_lib) CLI tool.
|
||||
|
||||
When using the CLI tool, you should have at least one AMD GPU and the driver
|
||||
installed.
|
||||
|
||||
>[!NOTE]
|
||||
>The AMD SMI CLI tool is provided as an example code to aid the development of
|
||||
>telemetry tools. The Python or C++ library is recommended as a robust data
|
||||
>source.
|
||||
|
||||
Find the documentation in the `docs/` directory.
|
||||
|
||||
- [Install AMD SMI](../docs/install/install.md)
|
||||
- [About the tool and how to get started](../docs/how-to/amdsmi-cli-tool.md)
|
||||
|
||||
## Online documentation
|
||||
|
||||
Explore the latest documentation on the [ROCm documentation
|
||||
portal](https://rocm.docs.amd.com/projects/en/latest/index.html).
|
||||
|
||||
- [Install AMD SMI](https://rocm.docs.amd.com/projects/amdsmi/en/latest/install/install.html)
|
||||
|
||||
- [CLI tool usage](https://rocm.docs.amd.com/projects/amdsmi/en/latest/how-to/amdsmi-cli-tool.html).
|
||||
|
||||
@@ -0,0 +1,52 @@
|
||||
# Release Notes
|
||||
|
||||
## Documentation
|
||||
|
||||
Documentation for AMDSMI-CLI is available post install in /opt/<rocm_instance>/libexec/amdsmi_cli/README.md
|
||||
|
||||
## AMDSMI-CLI 23.3.1.0
|
||||
|
||||
- not all ecc fields are currently supported
|
||||
- RHEL 8 & SLES 15 may have extra install steps
|
||||
|
||||
## AMDSMI-CLI 23.0.1.1
|
||||
|
||||
### Known Issues
|
||||
|
||||
- not all ecc fields are currently supported
|
||||
- RHEL 8 & SLES 15 have extra install steps
|
||||
|
||||
## AMDSMI-CLI 23.0.1.0
|
||||
|
||||
### Known Issues
|
||||
|
||||
- not all ecc fields are currently supported
|
||||
- RHEL 8 & SLES 15 have extra install steps
|
||||
|
||||
## AMDSMI-CLI 23.0.0.4
|
||||
|
||||
### Added
|
||||
|
||||
- AMDSMI-CLI tool enabled for Linux Baremetal & Guest
|
||||
- Added CSV & Watch modifier
|
||||
- Added topology subcommand
|
||||
|
||||
### Known Issues
|
||||
|
||||
- not all ecc fields are currently supported
|
||||
- RHEL 8 & SLES 15 have extra install steps
|
||||
|
||||
## AMDSMI-CLI 0.0.2
|
||||
|
||||
### Added
|
||||
|
||||
- AMDSMI-CLI tool enabled for Linux Baremetal & Guest
|
||||
|
||||
### Known Issues
|
||||
|
||||
- ecc & ras subcommands will report N/A even if RAS is enabled
|
||||
- process vram_mem's unit is listed as percentage vs bytes
|
||||
- csv modifier does not work
|
||||
- topology information is not yet enabled
|
||||
- watch modifier not fully enabled
|
||||
- limited guest support
|
||||
@@ -0,0 +1 @@
|
||||
from _version import __version__
|
||||
@@ -0,0 +1 @@
|
||||
__version__ = "@amd_smi_lib_VERSION_STRING@"
|
||||
Executable
+224
@@ -0,0 +1,224 @@
|
||||
#!/usr/bin/env python3
|
||||
# PYTHON_ARGCOMPLETE_OK
|
||||
#
|
||||
# Copyright (C) Advanced Micro Devices. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
# this software and associated documentation files (the "Software"), to deal in
|
||||
# the Software without restriction, including without limitation the rights to
|
||||
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
# the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
# subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
import logging
|
||||
import sys
|
||||
import os
|
||||
|
||||
try:
|
||||
import argcomplete
|
||||
except ImportError as e:
|
||||
logging.debug(f"Unhandled import error: {e}")
|
||||
logging.debug("argcomplete module not found. Autocomplete will not work.")
|
||||
|
||||
# from typing import TYPE_CHECKING
|
||||
# # only used for type checking
|
||||
# # pyright trips up and cannot find amdsmi scripts without it
|
||||
# if TYPE_CHECKING:
|
||||
# from amdsmi_commands import AMDSMICommands
|
||||
# from amdsmi_parser import AMDSMIParser
|
||||
# from amdsmi_logger import AMDSMILogger
|
||||
# import amdsmi_cli_exceptions
|
||||
# from amdsmi import amdsmi_interface
|
||||
# from amdsmi import amdsmi_exception
|
||||
|
||||
# Set the environment variable for GPU metrics cache duration
|
||||
gpu_metrics_cache_ms = os.environ.setdefault("AMDSMI_GPU_METRICS_CACHE_MS", "100")
|
||||
logging.debug("AMDSMI_GPU_METRICS_CACHE_MS = %sms", gpu_metrics_cache_ms)
|
||||
|
||||
# Set the environment variable for ASIC cache duration
|
||||
asic_info_cache_ms = os.environ.setdefault("AMDSMI_ASIC_INFO_CACHE_MS", "10000") # 10 seconds
|
||||
logging.debug("AMDSMI_ASIC_INFO_CACHE_MS = %sms", asic_info_cache_ms)
|
||||
|
||||
try:
|
||||
from amdsmi_init import *
|
||||
from amdsmi_helpers import AMDSMIHelpers
|
||||
from amdsmi_commands import AMDSMICommands
|
||||
from amdsmi_parser import AMDSMIParser
|
||||
from amdsmi_logger import AMDSMILogger
|
||||
import amdsmi_cli_exceptions
|
||||
except ImportError:
|
||||
current_path = os.path.dirname(os.path.abspath(__file__))
|
||||
cli_files_path = f"{current_path}/../libexec/amdsmi_cli"
|
||||
sys.path.append(cli_files_path)
|
||||
try:
|
||||
from amdsmi_init import *
|
||||
from amdsmi_helpers import AMDSMIHelpers
|
||||
from amdsmi_commands import AMDSMICommands
|
||||
from amdsmi_parser import AMDSMIParser
|
||||
from amdsmi_logger import AMDSMILogger
|
||||
import amdsmi_cli_exceptions
|
||||
except ImportError as e:
|
||||
print(f"Unhandled import error: {e}")
|
||||
print(f"Unable to import amdsmi_cli files. Check {cli_files_path} if they are present.")
|
||||
sys.exit(1)
|
||||
|
||||
def _print_error(e, destination):
|
||||
if destination in ['stdout', 'json', 'csv']:
|
||||
print(e)
|
||||
else:
|
||||
f = open(destination, "w", encoding="utf-8")
|
||||
f.write(e)
|
||||
f.close()
|
||||
print("Error occurred. Result written to " + str(destination) + " file")
|
||||
|
||||
def configure_logging_and_execute(args, amd_smi_commands):
|
||||
"""
|
||||
Configures logging based on the provided arguments and executes the subcommand.
|
||||
|
||||
Args:
|
||||
args: Parsed command-line arguments.
|
||||
amd_smi_commands: Instance of AMDSMICommands.
|
||||
"""
|
||||
# Remove previous log handlers
|
||||
for handler in logging.root.handlers[:]:
|
||||
logging.root.removeHandler(handler)
|
||||
|
||||
# To enable debug logs in AMD SMI library:
|
||||
# set RSMI_LOGGING = 1 for logging to files
|
||||
# set RSMI_LOGGING = 2 for logging to stdout
|
||||
# set RSMI_LOGGING = 3 for logging to stdout and files
|
||||
# set RSMI_LOGGING = 0 to disable logging
|
||||
# Files will be located in /var/log/amd_smi_lib/AMD-SMI-lib.log*
|
||||
|
||||
# log string with the following format:
|
||||
# loglevel | YYYY-MM-DD HH:MM:SS.ms | filename:line | message
|
||||
logging_dict = {
|
||||
'DEBUG': logging.DEBUG,
|
||||
'INFO': logging.INFO,
|
||||
'WARNING': logging.WARNING,
|
||||
'ERROR': logging.ERROR,
|
||||
'CRITICAL': logging.CRITICAL
|
||||
}
|
||||
|
||||
time = '%(asctime)s.%(msecs)03d'
|
||||
datefmt = '%Y-%m-%d %H:%M:%S'
|
||||
logging.basicConfig(format='%(levelname)s | ' + time + ' | %(filename)s:%(lineno)d | %(message)s',
|
||||
level=logging_dict[args.loglevel], datefmt=datefmt)
|
||||
|
||||
# Disable traceback for non-debug log levels
|
||||
if args.loglevel == "DEBUG":
|
||||
sys.tracebacklimit = 10
|
||||
else:
|
||||
sys.tracebacklimit = -1
|
||||
|
||||
logging.debug(args)
|
||||
|
||||
# Execute subcommands
|
||||
try:
|
||||
args.func(args)
|
||||
except amdsmi_cli_exceptions.AmdSmiException as e:
|
||||
_print_error(str(e), amd_smi_commands.logger.destination)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
exc = amdsmi_cli_exceptions.AmdSmiLibraryErrorException(amd_smi_commands.logger.format, e.get_error_code())
|
||||
_print_error(str(exc), amd_smi_commands.logger.destination)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Disable traceback before possible init errors in AMDSMICommands and AMDSMIParser
|
||||
copy_argv = str(sys.argv.copy()).upper()
|
||||
if "DEBUG" in copy_argv:
|
||||
sys.tracebacklimit = 10
|
||||
else:
|
||||
sys.tracebacklimit = -1
|
||||
|
||||
amd_smi_helpers = AMDSMIHelpers()
|
||||
amd_smi_commands = AMDSMICommands(helpers=amd_smi_helpers)
|
||||
amd_smi_parser = AMDSMIParser(amd_smi_commands.version,
|
||||
amd_smi_commands.list,
|
||||
amd_smi_commands.static,
|
||||
amd_smi_commands.firmware,
|
||||
amd_smi_commands.bad_pages,
|
||||
amd_smi_commands.metric,
|
||||
amd_smi_commands.process,
|
||||
amd_smi_commands.profile,
|
||||
amd_smi_commands.event,
|
||||
amd_smi_commands.topology,
|
||||
amd_smi_commands.set_value,
|
||||
amd_smi_commands.reset,
|
||||
amd_smi_commands.monitor,
|
||||
amd_smi_commands.xgmi,
|
||||
amd_smi_commands.partition,
|
||||
amd_smi_commands.ras,
|
||||
amd_smi_commands.node,
|
||||
amd_smi_commands.default,
|
||||
sys_argv=sys.argv,
|
||||
helpers=amd_smi_helpers)
|
||||
try:
|
||||
argcomplete.autocomplete(amd_smi_parser)
|
||||
except NameError:
|
||||
logging.debug("argcomplete module not found. Autocomplete will not work.")
|
||||
|
||||
# Store possible subcommands & aliases for later errors
|
||||
valid_commands = amd_smi_parser.possible_commands
|
||||
valid_commands += ['--help', '-h']
|
||||
|
||||
# Convert arguments to lowercase, but preserve case for folder path values
|
||||
processed_argv = []
|
||||
# Arguments that should preserve case
|
||||
case_sensitive_args = ['--folder', '--file', '--gpu', '--cpu', '--core', '--profile', '--cper-file']
|
||||
case_sensitive_prefixes = ['--folder=', '--file=', '--gpu=', '--cpu=', '--core=', '--profile=', '--cper-file=']
|
||||
|
||||
preserve_case_for_next = False
|
||||
for i, arg in enumerate(sys.argv):
|
||||
if preserve_case_for_next:
|
||||
# Preserve case for the next argument value
|
||||
processed_argv.append(arg)
|
||||
preserve_case_for_next = False
|
||||
elif arg in case_sensitive_args:
|
||||
# Convert flag to lowercase but preserve next value
|
||||
processed_argv.append(arg.lower())
|
||||
preserve_case_for_next = True
|
||||
elif any(arg.startswith(prefix) for prefix in case_sensitive_prefixes):
|
||||
# Handle --arg=value format, preserve case for the value part
|
||||
for prefix in case_sensitive_prefixes:
|
||||
if arg.startswith(prefix):
|
||||
flag = prefix.rstrip('=')
|
||||
value = arg[len(prefix):]
|
||||
processed_argv.append(flag.lower() + '=' + value)
|
||||
break
|
||||
elif arg.startswith('--') or not arg.startswith('-'):
|
||||
# Convert other long options and positional arguments to lowercase
|
||||
processed_argv.append(arg.lower())
|
||||
else:
|
||||
# Preserve case for short options
|
||||
processed_argv.append(arg)
|
||||
sys.argv = processed_argv
|
||||
|
||||
if len(sys.argv) == 1:
|
||||
args = amd_smi_parser.parse_args(args=['default'])
|
||||
elif sys.tracebacklimit == 10 and (sys.argv[1] == '--loglevel'):
|
||||
args = amd_smi_parser.parse_args(args=['default', '--loglevel'] + sys.argv[2:])
|
||||
elif sys.argv[1] in valid_commands:
|
||||
args = amd_smi_parser.parse_args(args=None)
|
||||
else:
|
||||
raise amdsmi_cli_exceptions.AmdSmiInvalidSubcommandException(sys.argv[1],amd_smi_commands.logger.destination)
|
||||
|
||||
# Handle command modifiers before subcommand execution
|
||||
# human readable is the default output format
|
||||
if hasattr(args, 'json') and args.json:
|
||||
amd_smi_commands.logger.format = amd_smi_commands.logger.LoggerFormat.json.value
|
||||
if hasattr(args, 'csv') and args.csv:
|
||||
amd_smi_commands.logger.format = amd_smi_commands.logger.LoggerFormat.csv.value
|
||||
if hasattr(args, 'file') and args.file:
|
||||
amd_smi_commands.logger.destination = args.file
|
||||
configure_logging_and_execute(args, amd_smi_commands)
|
||||
@@ -0,0 +1,309 @@
|
||||
#!/usr/bin/env python3
|
||||
#
|
||||
# Copyright (C) Advanced Micro Devices. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
# this software and associated documentation files (the "Software"), to deal in
|
||||
# the Software without restriction, including without limitation the rights to
|
||||
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
# the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
# subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
import json
|
||||
|
||||
|
||||
AMDSMI_ERROR_MESSAGES = {
|
||||
0: "Sucess",
|
||||
1: "Invalid parameters",
|
||||
2: "Command not supported",
|
||||
3: "Command not yet implemented",
|
||||
4: "Failed load module",
|
||||
5: "Failed load symbole",
|
||||
6: "Drm error",
|
||||
7: "API call failed",
|
||||
8: "Timeout in API call",
|
||||
9: "Retry operation",
|
||||
10: "Permission Denied",
|
||||
11: "Interrupt ocurred during execution",
|
||||
12: "I/O Error",
|
||||
13: "Address fault",
|
||||
14: "Error opening file",
|
||||
15: "Not enough memory",
|
||||
16: "Internal error",
|
||||
17: "Out of bounds",
|
||||
18: "Initialization error",
|
||||
19: "Internal reference counter exceeded",
|
||||
# Reserved for future error messages
|
||||
30: "Device busy",
|
||||
31: "Device Not found",
|
||||
32: "Device not initialized",
|
||||
33: "No more free slot",
|
||||
34: "Driver not loaded",
|
||||
# Reserved for future error messages
|
||||
40: "No data was found for given input",
|
||||
41: "Insufficient size for operation",
|
||||
42: "Unexpected size of data was read",
|
||||
43: "The data read or provided was unexpected",
|
||||
44: "System has different cpu than AMD",
|
||||
45: "Energy driver not found",
|
||||
46: "MSR driver not found",
|
||||
47: "HSMP driver not found",
|
||||
48: "HSMP not supported",
|
||||
49: "HSMP message/feature not supported",
|
||||
50: "HSMP message timed out",
|
||||
51: "No Energy and HSMP driver present",
|
||||
52: "File or directory not found",
|
||||
53: "Parsed argument is invalid",
|
||||
54: "AMDGPU restart error",
|
||||
55: "Setting is not available",
|
||||
0xFFFFFFFE: "AMD-SMI Library error did not map to a status code",
|
||||
0xFFFFFFFF: "Unknown error"
|
||||
}
|
||||
|
||||
def _get_error_message(error_code):
|
||||
if abs(error_code) in AMDSMI_ERROR_MESSAGES:
|
||||
return AMDSMI_ERROR_MESSAGES[abs(error_code)]
|
||||
return "Generic error"
|
||||
|
||||
|
||||
class AmdSmiException(Exception):
|
||||
def __init__(self):
|
||||
self.json_message = {}
|
||||
self.csv_message = ''
|
||||
self.stdout_message = ''
|
||||
self.message = ''
|
||||
self.output_format = ''
|
||||
self.device_type = ''
|
||||
|
||||
def __str__(self):
|
||||
# Return message according to the current output format
|
||||
if self.output_format == 'json':
|
||||
self.message = json.dumps(self.json_message)
|
||||
elif self.output_format == 'csv':
|
||||
self.message = self.csv_message
|
||||
else:
|
||||
self.message = self.stdout_message
|
||||
|
||||
return self.message
|
||||
|
||||
|
||||
class AmdSmiInvalidCommandException(AmdSmiException):
|
||||
def __init__(self, command, outputformat: str, message=None):
|
||||
super().__init__()
|
||||
self.value = -1
|
||||
self.command = command
|
||||
self.output_format = outputformat
|
||||
|
||||
common_message = f"Command '{self.command}' is invalid. Run 'amd-smi -h' for more info."
|
||||
|
||||
if message:
|
||||
common_message = message
|
||||
|
||||
self.json_message["error"] = common_message
|
||||
self.json_message["code"] = self.value
|
||||
self.csv_message = f"error,code\n{common_message}, {self.value}"
|
||||
self.stdout_message = f"{common_message} Error code: {self.value}"
|
||||
|
||||
|
||||
class AmdSmiInvalidParameterException(AmdSmiException):
|
||||
def __init__(self, command, arg, outputformat: str):
|
||||
super().__init__()
|
||||
self.value = -2
|
||||
self.command = command
|
||||
self.arg = arg
|
||||
self.output_format = outputformat
|
||||
|
||||
common_message = f"Parameter '{self.arg}' is invalid. Run 'amd-smi {self.command} -h' for more info."
|
||||
|
||||
self.json_message["error"] = common_message
|
||||
self.json_message["code"] = self.value
|
||||
self.csv_message = f"error,code\n{common_message}, {self.value}"
|
||||
self.stdout_message = f"{common_message} Error code: {self.value}"
|
||||
|
||||
|
||||
class AmdSmiDeviceNotFoundException(AmdSmiException):
|
||||
def __init__(self, command, outputformat: str, gpu: bool, cpu: bool, core: bool):
|
||||
super().__init__()
|
||||
self.value = -3
|
||||
self.command = command
|
||||
self.output_format = outputformat
|
||||
|
||||
# Handle different devices
|
||||
self.device_type = ""
|
||||
if gpu:
|
||||
self.device_type = "GPU"
|
||||
elif cpu:
|
||||
self.device_type = "CPU"
|
||||
elif core:
|
||||
self.device_type = "CPU CORE"
|
||||
|
||||
common_message = f"Can not find a device: {self.device_type} '{self.command}'"
|
||||
|
||||
self.json_message["error"] = common_message
|
||||
self.json_message["code"] = self.value
|
||||
self.csv_message = f"error,code\n{common_message}, {self.value}"
|
||||
self.stdout_message = f"{common_message} Error code: {self.value}"
|
||||
|
||||
|
||||
class AmdSmiInvalidFilePathException(AmdSmiException):
|
||||
def __init__(self, command, outputformat: str, message=None):
|
||||
super().__init__()
|
||||
self.value = -4
|
||||
self.command = command
|
||||
self.output_format = outputformat
|
||||
|
||||
common_message = f"Path '{self.command}' cannot be found."
|
||||
|
||||
if message:
|
||||
common_message = message
|
||||
|
||||
self.json_message["error"] = common_message
|
||||
self.json_message["code"] = self.value
|
||||
self.csv_message = f"error,code\n{common_message}, {self.value}"
|
||||
self.stdout_message = f"{common_message} Error code: {self.value}"
|
||||
|
||||
|
||||
class AmdSmiInvalidParameterValueException(AmdSmiException):
|
||||
def __init__(self, command, arg, outputformat: str):
|
||||
super().__init__()
|
||||
self.value = -5
|
||||
self.command = command
|
||||
self.arg = arg
|
||||
self.output_format = outputformat
|
||||
|
||||
common_message = f"Value '{self.arg}' is not of valid type or format. Run 'amd-smi {self.command} -h' for more info."
|
||||
|
||||
self.json_message["error"] = common_message
|
||||
self.json_message["code"] = self.value
|
||||
self.csv_message = f"error,code\n{common_message}, {self.value}"
|
||||
self.stdout_message = f"{common_message} Error code: {self.value}"
|
||||
|
||||
|
||||
class AmdSmiMissingParameterValueException(AmdSmiException):
|
||||
def __init__(self, command, outputformat: str):
|
||||
super().__init__()
|
||||
self.value = -6
|
||||
self.command = command
|
||||
self.output_format = outputformat
|
||||
|
||||
common_message = f"Parameter '{self.command}' requires a value. Run '--help' for more info."
|
||||
|
||||
self.json_message["error"] = common_message
|
||||
self.json_message["code"] = self.value
|
||||
self.csv_message = f"error,code\n{common_message}, {self.value}"
|
||||
self.stdout_message = f"{common_message} Error code: {self.value}"
|
||||
|
||||
|
||||
class AmdSmiCommandNotSupportedException(AmdSmiException):
|
||||
def __init__(self, command, outputformat: str):
|
||||
super().__init__()
|
||||
self.value = -7
|
||||
self.command = command
|
||||
self.output_format = outputformat
|
||||
|
||||
common_message = f"Command '{self.command}' is not supported on the system. Run '--help' for more info."
|
||||
|
||||
self.json_message["error"] = common_message
|
||||
self.json_message["code"] = self.value
|
||||
self.csv_message = f"error,code\n{common_message}, {self.value}"
|
||||
self.stdout_message = f"{common_message} Error code: {self.value}"
|
||||
|
||||
|
||||
class AmdSmiParameterNotSupportedException(AmdSmiException):
|
||||
def __init__(self, command, outputformat: str):
|
||||
super().__init__()
|
||||
self.value = -8
|
||||
self.command = command
|
||||
self.output_format = outputformat
|
||||
|
||||
common_message = f"Parameter '{self.command}' is not supported on the system. Run '--help' for more info."
|
||||
|
||||
self.json_message["error"] = common_message
|
||||
self.json_message["code"] = self.value
|
||||
self.csv_message = f"error,code\n{common_message}, {self.value}"
|
||||
self.stdout_message = f"{common_message} Error code: {self.value}"
|
||||
|
||||
|
||||
class AmdSmiRequiredCommandException(AmdSmiException):
|
||||
def __init__(self, command, outputformat: str):
|
||||
super().__init__()
|
||||
self.value = -9
|
||||
self.command = command
|
||||
self.output_format = outputformat
|
||||
|
||||
common_message = f"Command '{self.command}' requires a target argument. Run 'amd-smi {self.command} -h' for more info."
|
||||
|
||||
self.json_message["error"] = common_message
|
||||
self.json_message["code"] = self.value
|
||||
self.csv_message = f"error,code\n{common_message}, {self.value}"
|
||||
self.stdout_message = f"{common_message} Error code: {self.value}"
|
||||
|
||||
|
||||
class AmdSmiInvalidSubcommandException(AmdSmiException):
|
||||
def __init__(self, command, outputformat: str):
|
||||
super().__init__()
|
||||
self.value = -10
|
||||
self.command = command
|
||||
self.output_format = outputformat
|
||||
|
||||
common_message = f"AMD-SMI Command '{self.command}' is invalid. Must receive valid AMD-SMI Command first. Run 'amd-smi -h' for more info."
|
||||
|
||||
self.json_message["error"] = common_message
|
||||
self.json_message["code"] = self.value
|
||||
self.csv_message = f"error,code\n{common_message}, {self.value}"
|
||||
self.stdout_message = f"{common_message} Error code: {self.value}"
|
||||
|
||||
|
||||
class AmdSmiPermissionDeniedException(AmdSmiException):
|
||||
def __init__(self, command, outputformat: str):
|
||||
super().__init__()
|
||||
self.value = -11
|
||||
self.command = command
|
||||
self.output_format = outputformat
|
||||
|
||||
common_message = f"AMD-SMI Command '{self.command}' requires elevation (sudo privileges required)"
|
||||
|
||||
self.json_message["error"] = common_message
|
||||
self.json_message["code"] = self.value
|
||||
self.csv_message = f"error,code\n{common_message}, {self.value}"
|
||||
self.stdout_message = f"{common_message} Error code: {self.value}"
|
||||
|
||||
|
||||
class AmdSmiUnknownErrorException(AmdSmiException):
|
||||
def __init__(self, command, outputformat: str):
|
||||
super().__init__()
|
||||
self.value = -100
|
||||
self.command = command
|
||||
self.output_format = outputformat
|
||||
|
||||
common_message = "An unknown error has occurred. Run 'help' for more info."
|
||||
|
||||
self.json_message["error"] = common_message
|
||||
self.json_message["code"] = self.value
|
||||
self.csv_message = f"error,code\n{common_message}, {self.value}"
|
||||
self.stdout_message = f"{common_message} Error code: {self.value}"
|
||||
|
||||
|
||||
class AmdSmiLibraryErrorException(AmdSmiException):
|
||||
def __init__(self, outputformat: str, error_code):
|
||||
super().__init__()
|
||||
self.value = -1000 - abs(error_code)
|
||||
self.smilibcode = error_code
|
||||
self.output_format = outputformat
|
||||
|
||||
common_message = f"AMDSMI has returned error '{self.value}' - '{AMDSMI_ERROR_MESSAGES[abs(self.smilibcode)]}'"
|
||||
|
||||
self.json_message["error"] = common_message
|
||||
self.json_message["code"] = self.value
|
||||
self.csv_message = f"error,code\n{common_message}, {self.value}"
|
||||
self.stdout_message = f"{common_message} Error code: {self.value}"
|
||||
File diff ditekan karena terlalu besar
Load Diff
Executable
+1934
File diff ditekan karena terlalu besar
Load Diff
@@ -0,0 +1,154 @@
|
||||
#!/usr/bin/env python3
|
||||
#
|
||||
# Copyright (C) Advanced Micro Devices. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
# this software and associated documentation files (the "Software"), to deal in
|
||||
# the Software without restriction, including without limitation the rights to
|
||||
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
# the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
# subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
### Handle safe initialization for amdsmi
|
||||
|
||||
import atexit
|
||||
import logging
|
||||
import signal
|
||||
import sys
|
||||
import os
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
current_path = os.path.dirname(os.path.abspath(__file__))
|
||||
python_lib_path = f"{current_path}/../../share/amd_smi"
|
||||
sys.path.append(python_lib_path)
|
||||
# If the python library is installed, it will overwrite the path above
|
||||
|
||||
try:
|
||||
from amdsmi import amdsmi_interface, amdsmi_exception
|
||||
except ImportError as e:
|
||||
print(f"Unhandled import error: {e}")
|
||||
print("Failed to import the amdsmi Python library. Ensure it is installed in Python.")
|
||||
print(f"Alternatively, verify that the library is in the path:\n{python_lib_path}")
|
||||
sys.exit(1)
|
||||
|
||||
# Using basic python logging for user errors and development
|
||||
logging.basicConfig(format="%(levelname)s: %(message)s", level=logging.ERROR) # User level logging
|
||||
# This traceback limit only affects this file, once the code hit's the cli portion it get's reset to the user's preference
|
||||
sys.tracebacklimit = -1 # Disable traceback when raising errors
|
||||
|
||||
# On initial import set initialized variable
|
||||
AMDSMI_INITIALIZED = False
|
||||
AMDSMI_INIT_FLAG = amdsmi_interface.AmdSmiInitFlags.INIT_ALL_PROCESSORS
|
||||
AMD_VENDOR_ID = 4098
|
||||
|
||||
def check_amdgpu_driver():
|
||||
""" Returns true if amdgpu is found in the list of initialized modules """
|
||||
amd_gpu_status_file = Path("/sys/module/amdgpu/initstate")
|
||||
if amd_gpu_status_file.exists():
|
||||
if amd_gpu_status_file.read_text(encoding="ascii").strip() == "live":
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def check_amd_hsmp_driver():
|
||||
""" Returns true if amd_hsmp or hsmp_acpi is found in the list of initialized modules """
|
||||
amd_cpu_status_file = Path("/dev/hsmp")
|
||||
if amd_cpu_status_file.exists():
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def amdsmi_cli_init():
|
||||
""" Initializes AMDSMI Library for the CLI
|
||||
|
||||
Checks for the presence of the amdgpu, amd_hsmp or hsmp_acpi drivers and initializes the
|
||||
AMD SMI library based on the live drivers found.
|
||||
|
||||
Return:
|
||||
init_flag: the flag used to initialize the AMD SMI library without error
|
||||
|
||||
Raises:
|
||||
err: AmdSmiLibraryException if not successful in initializing any drivers
|
||||
"""
|
||||
init_flag = amdsmi_interface.AmdSmiInitFlags.INIT_ALL_PROCESSORS
|
||||
if check_amdgpu_driver() and check_amd_hsmp_driver():
|
||||
init_flag = amdsmi_interface.AmdSmiInitFlags.INIT_AMD_APUS
|
||||
logging.debug("Both amdgpu , amd_hsmp or hsmp_acpi driver's initstate is live")
|
||||
try:
|
||||
amdsmi_interface.amdsmi_init(init_flag)
|
||||
except (amdsmi_interface.AmdSmiLibraryException, amdsmi_interface.AmdSmiParameterException) as e:
|
||||
if e.err_code in (amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NOT_INIT,
|
||||
amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_DRIVER_NOT_LOADED):
|
||||
logging.error("Drivers not loaded (amdgpu, amd_hsmp or hsmp_acpi drivers not found in modules)")
|
||||
sys.exit(-1)
|
||||
else:
|
||||
raise e
|
||||
elif check_amdgpu_driver():
|
||||
init_flag = amdsmi_interface.AmdSmiInitFlags.INIT_AMD_GPUS
|
||||
logging.debug("amdgpu driver initstate is live")
|
||||
try:
|
||||
amdsmi_interface.amdsmi_init(init_flag)
|
||||
except (amdsmi_interface.AmdSmiLibraryException, amdsmi_interface.AmdSmiParameterException) as e:
|
||||
if e.err_code in (amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NOT_INIT,
|
||||
amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_DRIVER_NOT_LOADED):
|
||||
logging.error("Driver not loaded (amdgpu not found in modules)")
|
||||
sys.exit(-1)
|
||||
else:
|
||||
raise e
|
||||
logging.debug("amdgpu driver initialized successfully, but amd_hsmp or hsmp_acpi initstate was not live")
|
||||
elif check_amd_hsmp_driver():
|
||||
init_flag = amdsmi_interface.AmdSmiInitFlags.INIT_AMD_CPUS
|
||||
logging.debug("amd_hsmp or hsmp_acpi driver initstate is live")
|
||||
try:
|
||||
amdsmi_interface.amdsmi_init(init_flag)
|
||||
except (amdsmi_interface.AmdSmiLibraryException, amdsmi_interface.AmdSmiParameterException) as e:
|
||||
if e.err_code in (amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NOT_INIT,
|
||||
amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_DRIVER_NOT_LOADED):
|
||||
logging.error("Driver not loaded (amd_hsmp or hsmp_acpi not found in modules)")
|
||||
sys.exit(-1)
|
||||
else:
|
||||
raise e
|
||||
logging.debug("amd_hsmp or hsmp_acpi driver initialized successfully, but amdgpu initstate was not live")
|
||||
|
||||
logging.debug(f"AMDSMI initialized with atleast one driver successfully | init flag: {init_flag}")
|
||||
|
||||
return init_flag
|
||||
|
||||
def amdsmi_cli_shutdown():
|
||||
"""Shutdown AMDSMI instance
|
||||
|
||||
Raises:
|
||||
err: AmdSmiLibraryException if not successful
|
||||
"""
|
||||
try:
|
||||
amdsmi_interface.amdsmi_shut_down()
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
logging.error("Unable to cleanly shut down amd-smi-lib")
|
||||
raise e
|
||||
|
||||
|
||||
def signal_handler(sig, frame):
|
||||
logging.debug(f"Handling signal: {sig}")
|
||||
try:
|
||||
sys.exit(0)
|
||||
except Exception as e:
|
||||
logging.error("Unable to cleanly shut down amd-smi-lib, exception: %s", str(type(e).__name__))
|
||||
os._exit(0)
|
||||
|
||||
if not AMDSMI_INITIALIZED:
|
||||
AMDSMI_INIT_FLAG = amdsmi_cli_init()
|
||||
AMDSMI_INITIALIZED = True
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
signal.signal(signal.SIGTERM, signal_handler)
|
||||
atexit.register(amdsmi_cli_shutdown)
|
||||
File diff ditekan karena terlalu besar
Load Diff
File diff ditekan karena terlalu besar
Load Diff
@@ -0,0 +1,143 @@
|
||||
# This module provides common functions used for building
|
||||
# and packaging ROCm projects
|
||||
|
||||
option(CMAKE_VERBOSE_MAKEFILE "Enable verbose output" ON)
|
||||
option(CMAKE_EXPORT_COMPILE_COMMANDS "Export compile commands for linters and autocompleters" ON)
|
||||
|
||||
function(generic_add_rocm)
|
||||
set(ROCM_DIR "/opt/rocm" CACHE STRING "ROCm directory.")
|
||||
if(DEFINED ENV{ROCM_RPATH} AND NOT DEFINED LIB_RUNPATH)
|
||||
set(LIB_RUNPATH "\$ORIGIN:\$ORIGIN/../lib:\$ORIGIN/../lib64" PARENT_SCOPE)
|
||||
endif()
|
||||
|
||||
set(CMAKE_INSTALL_PREFIX ${ROCM_DIR} CACHE STRING "Default installation directory.")
|
||||
set(CPACK_PACKAGING_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}" CACHE STRING "Default packaging prefix.")
|
||||
# add package search paths
|
||||
set(CMAKE_PREFIX_PATH ${CMAKE_PREFIX_PATH} /usr/local PARENT_SCOPE)
|
||||
set(CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH} /usr/lib64 /usr/lib/x86_64-linux-gnu PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
function(generic_package)
|
||||
# Used by test and example CMakeLists
|
||||
set(SHARE_INSTALL_PREFIX "share/${CMAKE_PROJECT_NAME}" CACHE STRING "Tests and Example install directory")
|
||||
|
||||
if(CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.4.0)
|
||||
message("Compiler version is " ${CMAKE_CXX_COMPILER_VERSION})
|
||||
message(FATAL_ERROR "Require at least gcc-5.4.0")
|
||||
endif()
|
||||
|
||||
if("${CMAKE_BUILD_TYPE}" STREQUAL Release)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2" PARENT_SCOPE)
|
||||
else()
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb -O0 -DDEBUG" PARENT_SCOPE)
|
||||
endif()
|
||||
|
||||
# Add address sanitizer
|
||||
# derived from:
|
||||
# https://github.com/RadeonOpenCompute/ROCm-OpenCL-Runtime/blob/e176056061bf11fdd98b58dd57deb4ac5625844d/amdocl/CMakeLists.txt#L27
|
||||
if(${ADDRESS_SANITIZER})
|
||||
set(ASAN_COMPILER_FLAGS "-fno-omit-frame-pointer -fsanitize=address")
|
||||
set(ASAN_LINKER_FLAGS "-fsanitize=address")
|
||||
|
||||
if(BUILD_SHARED_LIBS)
|
||||
# Clang-specific flag for shared ASAN library
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
|
||||
set(ASAN_COMPILER_FLAGS "${ASAN_COMPILER_FLAGS} -shared-libsan")
|
||||
set(ASAN_LINKER_FLAGS "${ASAN_LINKER_FLAGS} -shared-libsan")
|
||||
endif()
|
||||
else()
|
||||
set(ASAN_LINKER_FLAGS "${ASAN_LINKER_FLAGS} -static-libsan")
|
||||
endif()
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ASAN_COMPILER_FLAGS}" PARENT_SCOPE)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ASAN_COMPILER_FLAGS}" PARENT_SCOPE)
|
||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${ASAN_LINKER_FLAGS}" PARENT_SCOPE)
|
||||
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${ASAN_LINKER_FLAGS}" PARENT_SCOPE)
|
||||
else()
|
||||
## Security breach mitigation flags
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DFORTIFY_SOURCE=2 -fstack-protector-all -Wcast-align" PARENT_SCOPE)
|
||||
## More security breach mitigation flags
|
||||
set(HARDENING_LDFLAGS "${HARDENING_LDFLAGS} -Wl,-z,noexecstack -Wl,-z,relro -Wl,-z,now")
|
||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${HARDENING_LDFLAGS}" PARENT_SCOPE)
|
||||
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${HARDENING_LDFLAGS}" PARENT_SCOPE)
|
||||
|
||||
include(CheckCXXCompilerFlag)
|
||||
check_cxx_compiler_flag("-Wtrampolines" CXX_SUPPORTS_WTRAMPOLINES)
|
||||
if(CXX_SUPPORTS_WTRAMPOLINES)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wtrampolines" PARENT_SCOPE)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Clang does not set the build-id
|
||||
# similar to if(NOT CMAKE_COMPILER_IS_GNUCC)
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
|
||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--build-id=sha1" PARENT_SCOPE)
|
||||
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--build-id=sha1" PARENT_SCOPE)
|
||||
endif()
|
||||
|
||||
# configure packaging
|
||||
# cpack version is populated with CMAKE_PROJECT_VERSION implicitly
|
||||
set(CPACK_PACKAGE_NAME ${CMAKE_PROJECT_NAME} CACHE STRING "")
|
||||
set(CPACK_PACKAGE_VENDOR "Advanced Micro Devices, Inc." CACHE STRING "")
|
||||
set(CPACK_PACKAGING_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}" CACHE STRING "Default packaging prefix.")
|
||||
set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE" CACHE STRING "")
|
||||
set(CPACK_RPM_PACKAGE_LICENSE "MIT" CACHE STRING "")
|
||||
set(CPACK_GENERATOR "DEB;RPM" CACHE STRING "Default packaging generators.")
|
||||
set(CPACK_VERBATIM_VARIABLES ON CACHE BOOL "Escape strings passed to CPACK.")
|
||||
set(CPACK_DEB_COMPONENT_INSTALL ON PARENT_SCOPE)
|
||||
set(CPACK_RPM_COMPONENT_INSTALL ON PARENT_SCOPE)
|
||||
mark_as_advanced(CPACK_PACKAGE_NAME CPACK_PACKAGE_VENDOR CPACK_PACKAGE_CONTACT CPACK_RESOURCE_FILE_LICENSE
|
||||
CPACK_RPM_PACKAGE_LICENSE CPACK_GENERATOR)
|
||||
|
||||
# Debian package specific variables
|
||||
if(DEFINED ENV{CPACK_DEBIAN_PACKAGE_RELEASE})
|
||||
set(CPACK_DEBIAN_PACKAGE_RELEASE $ENV{CPACK_DEBIAN_PACKAGE_RELEASE} PARENT_SCOPE)
|
||||
else()
|
||||
set(CPACK_DEBIAN_PACKAGE_RELEASE "local" PARENT_SCOPE)
|
||||
endif()
|
||||
message("Using CPACK_DEBIAN_PACKAGE_RELEASE ${CPACK_DEBIAN_PACKAGE_RELEASE}")
|
||||
set(CPACK_DEBIAN_FILE_NAME "DEB-DEFAULT" PARENT_SCOPE)
|
||||
|
||||
# RPM package specific variables
|
||||
if(DEFINED ENV{CPACK_RPM_PACKAGE_RELEASE})
|
||||
set(CPACK_RPM_PACKAGE_RELEASE $ENV{CPACK_RPM_PACKAGE_RELEASE} PARENT_SCOPE)
|
||||
else()
|
||||
set(CPACK_RPM_PACKAGE_RELEASE "local" PARENT_SCOPE)
|
||||
endif()
|
||||
message("Using CPACK_RPM_PACKAGE_RELEASE ${CPACK_RPM_PACKAGE_RELEASE}")
|
||||
set(CPACK_RPM_FILE_NAME "RPM-DEFAULT" PARENT_SCOPE)
|
||||
set(CPACK_RPM_PACKAGE_AUTOREQ 0 PARENT_SCOPE)
|
||||
set(CPACK_RPM_PACKAGE_AUTOPROV 1 PARENT_SCOPE)
|
||||
list(
|
||||
APPEND
|
||||
CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION
|
||||
"/lib"
|
||||
"/usr/sbin"
|
||||
"/lib/systemd"
|
||||
"/lib/systemd/system"
|
||||
"/usr"
|
||||
"/opt")
|
||||
|
||||
# PACKAGE-tests need PACKAGE
|
||||
set(CPACK_DEBIAN_TESTS_PACKAGE_DEPENDS "${CPACK_PACKAGE_NAME}" PARENT_SCOPE)
|
||||
set(CPACK_RPM_TESTS_PACKAGE_REQUIRES "${CPACK_PACKAGE_NAME}" PARENT_SCOPE)
|
||||
|
||||
# Treat runtime group as package base.
|
||||
# Without it - the base package would be named 'rdc-runtime'
|
||||
# resulting in rdc-runtime*.deb and rdc-runtime*.rpm
|
||||
set(CPACK_DEBIAN_RUNTIME_PACKAGE_NAME "${CPACK_PACKAGE_NAME}" PARENT_SCOPE)
|
||||
set(CPACK_RPM_RUNTIME_PACKAGE_NAME "${CPACK_PACKAGE_NAME}" PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
# this function goes after 'include(CPack)'
|
||||
function(generic_package_post)
|
||||
# PACKAGE package, no postfix
|
||||
cpack_add_component_group("runtime")
|
||||
cpack_add_component(dev GROUP runtime DESCRIPTION "Development components of the library")
|
||||
cpack_add_component(unspecified GROUP runtime)
|
||||
# not quite sure why this is the only way to populate cpack description
|
||||
cpack_add_component(runtime GROUP runtime DESCRIPTION "Runtime components of the library")
|
||||
|
||||
# PACKAGE-tests package, -tests postfix
|
||||
cpack_add_component_group("tests")
|
||||
cpack_add_component(tests GROUP tests DESCRIPTION "Test components of the library")
|
||||
endfunction()
|
||||
@@ -0,0 +1,200 @@
|
||||
################################################################################
|
||||
## Copyright (C) Advanced Micro Devices. All rights reserved.
|
||||
##
|
||||
## Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
## this software and associated documentation files (the "Software"), to deal in
|
||||
## the Software without restriction, including without limitation the rights to
|
||||
## use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
## the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
## subject to the following conditions:
|
||||
##
|
||||
## The above copyright notice and this permission notice shall be included in all
|
||||
## copies or substantial portions of the Software.
|
||||
##
|
||||
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
## FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
## COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
## IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
## CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
## Parses the VERSION_STRING variable and places
|
||||
## the first, second and third number values in
|
||||
## the major, minor and patch variables.
|
||||
function(parse_version VERSION_STRING)
|
||||
|
||||
string(FIND ${VERSION_STRING} "-" STRING_INDEX)
|
||||
|
||||
if(${STRING_INDEX} GREATER -1)
|
||||
math(EXPR STRING_INDEX "${STRING_INDEX} + 1")
|
||||
string(SUBSTRING ${VERSION_STRING} ${STRING_INDEX} -1 VERSION_BUILD)
|
||||
endif()
|
||||
|
||||
string(REGEX MATCHALL "[0-9]+" VERSIONS ${VERSION_STRING})
|
||||
list(LENGTH VERSIONS VERSION_COUNT)
|
||||
|
||||
if(${VERSION_COUNT} GREATER 0)
|
||||
list(GET VERSIONS 0 MAJOR)
|
||||
set(VERSION_MAJOR ${MAJOR} PARENT_SCOPE)
|
||||
set(TEMP_VERSION_STRING "${MAJOR}")
|
||||
endif()
|
||||
|
||||
if(${VERSION_COUNT} GREATER 1)
|
||||
list(GET VERSIONS 1 MINOR)
|
||||
set(VERSION_MINOR ${MINOR} PARENT_SCOPE)
|
||||
set(TEMP_VERSION_STRING "${TEMP_VERSION_STRING}.${MINOR}")
|
||||
endif()
|
||||
|
||||
if(${VERSION_COUNT} GREATER 2)
|
||||
list(GET VERSIONS 2 PATCH)
|
||||
set(VERSION_PATCH ${PATCH} PARENT_SCOPE)
|
||||
set(TEMP_VERSION_STRING "${TEMP_VERSION_STRING}.${PATCH}")
|
||||
endif()
|
||||
|
||||
set(VERSION_STRING "${TEMP_VERSION_STRING}" PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
function(get_version_from_file REL_FILE_PATH ITEM)
|
||||
set(FILE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/${REL_FILE_PATH}")
|
||||
set(OUTPUT_ITEM "0")
|
||||
|
||||
if(EXISTS "${FILE_PATH}")
|
||||
file(READ ${FILE_PATH} file_contents)
|
||||
string(REGEX MATCHALL "AMDSMI_LIB_VERSION_${ITEM} *[0-9]+" OUTPUT_STR "${file_contents}")
|
||||
list(LENGTH OUTPUT_STR OUTPUT_STR_LENGTH)
|
||||
if(${OUTPUT_STR_LENGTH} GREATER 0)
|
||||
string(REGEX MATCH "[0-9]+" OUTPUT_ITEM "${OUTPUT_STR}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(${ITEM} "${OUTPUT_ITEM}" PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
# Parses file for a pattern and replaces the value
|
||||
# associated with that pattern with a specified value
|
||||
# Replaces VERSION(MAJOR.MINOR.RELEASE) with updated values
|
||||
function(update_version_in_file REL_FILE_PATH DEFAULT_VERSION PAT1 PAT2 PAT3)
|
||||
get_version_from_file(${REL_FILE_PATH} "MAJOR")
|
||||
get_version_from_file(${REL_FILE_PATH} "MINOR")
|
||||
get_version_from_file(${REL_FILE_PATH} "RELEASE")
|
||||
set(FILE_VERSION "${MAJOR}.${MINOR}.${RELEASE}")
|
||||
|
||||
if(DEFAULT_VERSION VERSION_GREATER FILE_VERSION)
|
||||
set(FILE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/${REL_FILE_PATH}")
|
||||
if(EXISTS "${FILE_PATH}")
|
||||
parse_version(${DEFAULT_VERSION})
|
||||
file(READ ${FILE_PATH} file_contents_new)
|
||||
|
||||
string(REGEX REPLACE "${PAT1}MAJOR${PAT2} *[0-9]*" "${PAT1}MAJOR${PAT3}${VERSION_MAJOR}" file_contents
|
||||
"${file_contents_new}")
|
||||
string(REGEX REPLACE "${PAT1}MINOR${PAT2} *[0-9]*" "${PAT1}MINOR${PAT3}${VERSION_MINOR}" file_contents_new
|
||||
"${file_contents}")
|
||||
string(REGEX REPLACE "${PAT1}RELEASE${PAT2} *[0-9]*" "${PAT1}RELEASE${PAT3}${VERSION_PATCH}" file_contents
|
||||
"${file_contents_new}")
|
||||
|
||||
file(WRITE ${FILE_PATH} "${file_contents}")
|
||||
endif()
|
||||
set(VERSION_STRING "${DEFAULT_VERSION}" PARENT_SCOPE)
|
||||
else()
|
||||
set(VERSION_STRING "${FILE_VERSION}" PARENT_SCOPE)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
## Gets the current version of the repository
|
||||
## using versioning tags and git describe.
|
||||
## Passes back a packaging version string
|
||||
## and a library version string.
|
||||
function(get_version_from_tag DEFAULT_VERSION_STRING VERSION_PREFIX GIT)
|
||||
parse_version(${DEFAULT_VERSION_STRING})
|
||||
set(DEFAULT_VERSION_MAJOR "${VERSION_MAJOR}")
|
||||
set(DEFAULT_VERSION_MINOR "${VERSION_MINOR}")
|
||||
set(DEFAULT_VERSION_PATCH "${VERSION_PATCH}")
|
||||
|
||||
if(GIT)
|
||||
execute_process(
|
||||
COMMAND git tag --list --sort=-version:refname "${VERSION_PREFIX}*"
|
||||
COMMAND head -n 1
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
OUTPUT_VARIABLE GIT_TAG_STRING
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE RESULTS_VARIABLE RESULTS)
|
||||
if(GIT_TAG_STRING)
|
||||
parse_version(${GIT_TAG_STRING})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(VERSION_STRING VERSION_GREATER DEFAULT_VERSION_STRING)
|
||||
set(VERSION_STRING "${VERSION_STRING}" PARENT_SCOPE)
|
||||
set(VERSION_MAJOR "${VERSION_MAJOR}" PARENT_SCOPE)
|
||||
set(VERSION_MINOR "${VERSION_MINOR}" PARENT_SCOPE)
|
||||
set(VERSION_PATCH "${VERSION_PATCH}" PARENT_SCOPE)
|
||||
else()
|
||||
set(VERSION_STRING "${DEFAULT_VERSION_STRING}" PARENT_SCOPE)
|
||||
set(VERSION_MAJOR "${DEFAULT_VERSION_MAJOR}" PARENT_SCOPE)
|
||||
set(VERSION_MINOR "${DEFAULT_VERSION_MINOR}" PARENT_SCOPE)
|
||||
set(VERSION_PATCH "${DEFAULT_VERSION_PATCH}" PARENT_SCOPE)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
function(num_change_since_prev_pkg VERSION_PREFIX)
|
||||
find_program(get_commits NAMES version_util.sh PATHS ${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules)
|
||||
if(get_commits)
|
||||
execute_process(
|
||||
COMMAND ${get_commits} -c ${VERSION_PREFIX}
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
OUTPUT_VARIABLE NUM_COMMITS
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
RESULT_VARIABLE RESULT)
|
||||
|
||||
set(NUM_COMMITS "${NUM_COMMITS}" PARENT_SCOPE)
|
||||
|
||||
if(${RESULT} EQUAL 0)
|
||||
message("${NUM_COMMITS} were found since previous release")
|
||||
else()
|
||||
message("Unable to determine number of commits since previous release")
|
||||
endif()
|
||||
else()
|
||||
message("WARNING: Didn't find version_util.sh")
|
||||
set(NUM_COMMITS "unknown" PARENT_SCOPE)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
function(get_package_version_number DEFAULT_VERSION_STRING VERSION_PREFIX GIT)
|
||||
parse_version(${DEFAULT_VERSION_STRING})
|
||||
num_change_since_prev_pkg(${VERSION_PREFIX})
|
||||
set(PKG_VERSION_STR "${VERSION_STRING}.${NUM_COMMITS}")
|
||||
if(DEFINED ENV{ROCM_BUILD_ID})
|
||||
set(VERSION_ID $ENV{ROCM_BUILD_ID})
|
||||
else()
|
||||
set(VERSION_ID "local-build-0")
|
||||
endif()
|
||||
|
||||
set(PKG_VERSION_STR "${PKG_VERSION_STR}-${VERSION_ID}")
|
||||
|
||||
if(GIT)
|
||||
execute_process(
|
||||
COMMAND git rev-parse --short HEAD
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
OUTPUT_VARIABLE VERSION_HASH
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
RESULT_VARIABLE RESULT)
|
||||
if(${RESULT} EQUAL 0)
|
||||
# Check for dirty workspace.
|
||||
execute_process(COMMAND git diff --quiet WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
RESULT_VARIABLE RESULT)
|
||||
if(${RESULT} EQUAL 1)
|
||||
set(VERSION_HASH "${VERSION_HASH}-dirty")
|
||||
endif()
|
||||
else()
|
||||
set(VERSION_HASH "unknown")
|
||||
endif()
|
||||
else()
|
||||
set(VERSION_HASH "unknown")
|
||||
endif()
|
||||
set(PKG_VERSION_STR "${PKG_VERSION_STR}-${VERSION_HASH}")
|
||||
set(PKG_VERSION_STR ${PKG_VERSION_STR} PARENT_SCOPE)
|
||||
set(PKG_VERSION_HASH ${VERSION_HASH} PARENT_SCOPE)
|
||||
set(CPACK_PACKAGE_VERSION_MAJOR ${VERSION_MAJOR} PARENT_SCOPE)
|
||||
set(CPACK_PACKAGE_VERSION_MINOR ${VERSION_MINOR} PARENT_SCOPE)
|
||||
set(CPACK_PACKAGE_VERSION_PATCH ${VERSION_PATCH} PARENT_SCOPE)
|
||||
endfunction()
|
||||
+40
@@ -0,0 +1,40 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Handle commandline args
|
||||
while [ "$1" != "" ]; do
|
||||
case $1 in
|
||||
-c ) # Commits since prevous tag
|
||||
TARGET="count" ;;
|
||||
* )
|
||||
TARGET="count"
|
||||
break ;;
|
||||
esac
|
||||
shift 1
|
||||
done
|
||||
TAG_PREFIX=$1
|
||||
reg_ex="${TAG_PREFIX}*"
|
||||
|
||||
commits_since_last_tag() {
|
||||
TAG_ARR=(`git tag --sort=committerdate -l ${reg_ex} | tail -2`)
|
||||
PREVIOUS_TAG=${TAG_ARR[0]}
|
||||
CURRENT_TAG=${TAG_ARR[1]}
|
||||
|
||||
PREV_CMT_NUM=`git rev-list --count $PREVIOUS_TAG`
|
||||
CURR_CMT_NUM=`git rev-list --count $CURRENT_TAG`
|
||||
|
||||
# Commits since prevous tag:
|
||||
if [[ -z $PREV_CMT_NUM || -z $CURR_CMT_NUM ]]; then
|
||||
let NUM_COMMITS="0"
|
||||
else
|
||||
let NUM_COMMITS="${CURR_CMT_NUM}-${PREV_CMT_NUM}"
|
||||
fi
|
||||
echo $NUM_COMMITS
|
||||
}
|
||||
|
||||
case $TARGET in
|
||||
count) commits_since_last_tag ;;
|
||||
*) die "Invalid target $target" ;;
|
||||
esac
|
||||
|
||||
exit 0
|
||||
|
||||
@@ -0,0 +1,14 @@
|
||||
!.sphinx/
|
||||
!.doxygen/
|
||||
/_build/
|
||||
/_doxygen/
|
||||
/_images/
|
||||
/_static/
|
||||
/_templates/
|
||||
/html/
|
||||
/latex/
|
||||
404.md
|
||||
data/AMD-404.png
|
||||
|
||||
# file below is overwritten by sphinx script!
|
||||
./esmi_lib_readme_link.md
|
||||
@@ -0,0 +1,296 @@
|
||||
#
|
||||
# Copyright (C) Advanced Micro Devices. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
# this software and associated documentation files (the "Software"), to deal in
|
||||
# the Software without restriction, including without limitation the rights to
|
||||
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
# the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
# subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
import re
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from docutils import nodes
|
||||
from docutils.parsers.rst import Directive, directives
|
||||
from sphinx.application import Sphinx
|
||||
from sphinx.util.typing import ExtensionMetadata
|
||||
|
||||
|
||||
class GoApiRefDirective(Directive):
|
||||
"""
|
||||
Directive for generating Go API reference documentation.
|
||||
|
||||
Usage:
|
||||
.. go-api-ref:: path/to/gofile.go
|
||||
:section: gpu
|
||||
"""
|
||||
|
||||
required_arguments = 1 # Requires one argument: the path to the Go file
|
||||
optional_arguments = 0
|
||||
has_content = False
|
||||
option_spec = {
|
||||
"section": directives.unchanged, # Optional section filter
|
||||
}
|
||||
|
||||
def run(self):
|
||||
# Get the path to the Go file
|
||||
go_file_path = self.arguments[0]
|
||||
env = self.state.document.settings.env
|
||||
|
||||
# Get the section filter if provided
|
||||
section_filter = self.options.get("section", None)
|
||||
|
||||
# Resolve the path relative to the document
|
||||
doc_dir = Path(env.doc2path(env.docname)).parent
|
||||
source_path = (doc_dir / go_file_path).resolve()
|
||||
|
||||
# Check if the file exists
|
||||
if not source_path.exists():
|
||||
msg = f"Go source file not found: {source_path}"
|
||||
return [nodes.warning("", nodes.paragraph("", msg))]
|
||||
|
||||
# Parse the Go file and generate documentation
|
||||
functions = parse_go_file(str(source_path))
|
||||
|
||||
# Create a container for the API documentation
|
||||
container = nodes.container()
|
||||
container["classes"].append("go-api-reference")
|
||||
|
||||
# Add the API documentation to the container
|
||||
content = generate_rst_content(functions, section_filter)
|
||||
self.state_machine.insert_input(content, source=str(source_path))
|
||||
|
||||
return [container]
|
||||
|
||||
|
||||
def parse_go_file(file_path):
|
||||
"""Parse a Go file and extract function documentation."""
|
||||
with open(file_path, "r") as f:
|
||||
content = f.read()
|
||||
|
||||
# Pattern to match function documentation and definition
|
||||
pattern = r"(\/\/[^\n]*(?:\n\/\/[^\n]*)*)\n\s*func\s+([A-Za-z0-9_]+)\s*\((.*?)\)\s*(\(.*?\)|\w+)\s*\{"
|
||||
matches = re.findall(pattern, content, re.DOTALL)
|
||||
|
||||
functions = []
|
||||
for match in matches:
|
||||
doc_comment = match[0]
|
||||
func_name = match[1]
|
||||
params = match[2].strip()
|
||||
return_type = match[3].strip()
|
||||
|
||||
# Process the comment lines
|
||||
doc_lines = []
|
||||
for line in doc_comment.split("\n"):
|
||||
if line.strip().startswith("//"):
|
||||
# Remove the comment marker and one space after it (if present)
|
||||
comment_text = line.strip()[2:]
|
||||
if comment_text.startswith(" "):
|
||||
comment_text = comment_text[1:]
|
||||
doc_lines.append(comment_text)
|
||||
|
||||
# Extract sections from the doc comment
|
||||
description = []
|
||||
input_params = []
|
||||
output_params = []
|
||||
example = []
|
||||
|
||||
current_section = "description"
|
||||
|
||||
for line in doc_lines:
|
||||
if line.startswith("Input parameter"):
|
||||
current_section = "input"
|
||||
input_params.append(line)
|
||||
elif line.startswith("Output:"):
|
||||
current_section = "output"
|
||||
output_params.append(line)
|
||||
elif line.startswith("Example:"):
|
||||
current_section = "example"
|
||||
example.append(line)
|
||||
elif current_section == "description":
|
||||
description.append(line)
|
||||
elif current_section == "input":
|
||||
input_params.append(line)
|
||||
elif current_section == "output":
|
||||
output_params.append(line)
|
||||
elif current_section == "example":
|
||||
example.append(line)
|
||||
|
||||
# Combine description lines into a single line
|
||||
desc_text = " ".join([line.strip() for line in description if line.strip()])
|
||||
|
||||
# Combine output lines into a single line
|
||||
output_text = " ".join([line.strip() for line in output_params if line.strip()])
|
||||
|
||||
# Determine the section based on function name
|
||||
parts = func_name.split("_")
|
||||
section = parts[1] if len(parts) > 1 else "other"
|
||||
|
||||
functions.append(
|
||||
{
|
||||
"name": func_name,
|
||||
"params": params,
|
||||
"return_type": return_type,
|
||||
"description": desc_text,
|
||||
"input_params": "\n".join(input_params).strip(),
|
||||
"output_params": output_text,
|
||||
"example": "\n".join(example).strip(),
|
||||
"section": section.lower(), # Store the section for filtering
|
||||
}
|
||||
)
|
||||
|
||||
return functions
|
||||
|
||||
|
||||
def generate_rst_content(functions, section_filter=None):
|
||||
"""Generate reStructuredText content from parsed functions."""
|
||||
lines = []
|
||||
|
||||
# Filter functions by section if a filter is provided
|
||||
if section_filter:
|
||||
section_filter = section_filter.lower()
|
||||
functions = [f for f in functions if f["section"] == section_filter]
|
||||
|
||||
if not functions:
|
||||
lines.append(f"No functions found in section: {section_filter}")
|
||||
return lines
|
||||
|
||||
# Group functions by prefix if no section filter is provided
|
||||
if not section_filter:
|
||||
# Group functions by prefix (e.g., GO_gpu_, GO_cpu_)
|
||||
function_groups = {}
|
||||
for func in functions:
|
||||
section = func["section"]
|
||||
if section not in function_groups:
|
||||
function_groups[section] = []
|
||||
function_groups[section].append(func)
|
||||
|
||||
# Define the order of sections (GPU first, then CPU, then others)
|
||||
section_order = []
|
||||
|
||||
# Add GPU section first if it exists
|
||||
if "gpu" in function_groups:
|
||||
section_order.append("gpu")
|
||||
|
||||
# Add CPU section next if it exists
|
||||
if "cpu" in function_groups:
|
||||
section_order.append("cpu")
|
||||
|
||||
# Add all other sections in alphabetical order
|
||||
for prefix in sorted(function_groups.keys()):
|
||||
if prefix not in ["gpu", "cpu"]:
|
||||
section_order.append(prefix)
|
||||
|
||||
# Write each group in the specified order
|
||||
for section in section_order:
|
||||
funcs = function_groups[section]
|
||||
lines.append(f"{section.upper()} Functions")
|
||||
lines.append("-" * len(f"{section.upper()} Functions"))
|
||||
lines.append("")
|
||||
|
||||
for func in funcs:
|
||||
add_function_documentation(lines, func)
|
||||
else:
|
||||
# If a section filter is provided, just document those functions without section headers
|
||||
for func in functions:
|
||||
add_function_documentation(lines, func)
|
||||
|
||||
return lines
|
||||
|
||||
|
||||
def add_function_documentation(lines, func):
|
||||
"""Add documentation for a single function to the lines list."""
|
||||
lines.append(func['name'])
|
||||
lines.append("~" * len(f"``{func['name']}``"))
|
||||
lines.append("")
|
||||
|
||||
# Function signature
|
||||
return_type = func["return_type"]
|
||||
if return_type.startswith("(") and return_type.endswith(")"):
|
||||
return_type = return_type[1:-1]
|
||||
|
||||
lines.append(".. code-block:: go")
|
||||
lines.append("")
|
||||
lines.append(f" func {func['name']}({func['params']}) {return_type}")
|
||||
lines.append("")
|
||||
|
||||
# Description
|
||||
if func["description"]:
|
||||
lines.append(func["description"])
|
||||
lines.append("")
|
||||
|
||||
# Input parameters
|
||||
if func["input_params"]:
|
||||
for input_line in func["input_params"].split("\n"):
|
||||
lines.append(input_line)
|
||||
lines.append("")
|
||||
|
||||
# Output parameters
|
||||
if func["output_params"]:
|
||||
lines.append(func["output_params"])
|
||||
lines.append("")
|
||||
|
||||
# Example
|
||||
if func["example"]:
|
||||
# Process the example to properly format code blocks
|
||||
example_lines = func["example"].split("\n")
|
||||
in_code_block = False
|
||||
|
||||
for i, line in enumerate(example_lines):
|
||||
stripped_line = line.strip()
|
||||
|
||||
# Check if this is the Example: line
|
||||
if stripped_line == "Example:":
|
||||
lines.append("Example:")
|
||||
continue
|
||||
|
||||
# Check if we're entering a code block
|
||||
if (
|
||||
not in_code_block
|
||||
and i > 0
|
||||
and (
|
||||
stripped_line.startswith("import")
|
||||
or stripped_line.startswith("if")
|
||||
or stripped_line.startswith("for")
|
||||
)
|
||||
):
|
||||
in_code_block = True
|
||||
lines.append("")
|
||||
lines.append(".. code-block:: go")
|
||||
lines.append("")
|
||||
|
||||
# Add the line to the formatted example
|
||||
if in_code_block:
|
||||
# For code blocks, add indentation
|
||||
lines.append(f" {line}")
|
||||
elif stripped_line: # Only add non-empty lines outside code blocks
|
||||
lines.append(line)
|
||||
|
||||
lines.append("")
|
||||
|
||||
|
||||
def setup(app):
|
||||
"""
|
||||
Setup function for Sphinx extension.
|
||||
This will be called by Sphinx when the extension is loaded.
|
||||
"""
|
||||
# Register the directive
|
||||
app.add_directive("go-api-ref", GoApiRefDirective)
|
||||
|
||||
return {
|
||||
"version": "0.1.0",
|
||||
"parallel_read_safe": True,
|
||||
"parallel_write_safe": True,
|
||||
}
|
||||
@@ -0,0 +1,95 @@
|
||||
---
|
||||
myst:
|
||||
html_meta:
|
||||
"description lang=en": "AMD SMI for reliability, availability, serviceability."
|
||||
"keywords": "system, management, interface, cper, log, error, spec, ecc, afid, fault, ras"
|
||||
---
|
||||
|
||||
# Reliability, availability, serviceability (RAS)
|
||||
|
||||
RAS aims to increase the robustness of a system by detecting hardware errors, recording them, and
|
||||
correcting them where possible. See [Reliability, availability, serviceability (Linux
|
||||
kernel)](https://docs.kernel.org/admin-guide/RAS/main.html) for more general information.
|
||||
|
||||
## ECC
|
||||
|
||||
ECC (Error-Correcting Code) is a type of memory to automatically detect errors. Correctable 1-bit
|
||||
errors are handled by the ECC logic and logged by the hardware. Uncorrectable 2-bit errors can be
|
||||
detected but not reliably fixed; this is a more serious event that must be reported. See [RAS Error
|
||||
Count sysfs Interface](https://docs.kernel.org/gpu/amdgpu/ras.html#ras-error-count-sysfs-interface)
|
||||
to learn how AMD SMI accesses error counts.
|
||||
|
||||
While ECC is a mechanism to handle different errors, CPER is the standard used to report that the event
|
||||
occurred.
|
||||
|
||||
## CPER
|
||||
|
||||
At its core, CPER (Common Platform Error Record) is a standard format included in the [UEFI
|
||||
specification](https://uefi.org/specs/UEFI/2.10/01_Introduction.html) to report errors to the
|
||||
operating system. It works as a standard error report template that different hardware components
|
||||
can fill out when something goes wrong. It consists of a header, one or more section descriptors --
|
||||
and for each descriptor, an associated section containing error or informational data. See [CPER
|
||||
(UEFI Specification)](https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html) for
|
||||
more information.
|
||||
|
||||
A CPER record consists of vital information for diagnostics such as:
|
||||
|
||||
- Error source
|
||||
- Error type
|
||||
- Error severity
|
||||
- 0 - Recoverable (also called non-fatal uncorrected)
|
||||
- 1 - Fatal
|
||||
- 2 - Corrected
|
||||
- 3 - Informational
|
||||
- Timestamp
|
||||
- Other data
|
||||
|
||||
A CPER record might contain an AFID in its data to help map a complex error to a more actionable service task.
|
||||
|
||||
## AFID
|
||||
|
||||
AFIDs (AMD Field ID) are unique numerical IDs associated with specific events or errors produced by
|
||||
AMD Instinct accelerators. It provides a specific identifier for a known condition, which helps
|
||||
facilitate root cause analysis. Each AFID is associated with category, type, and severity fields. See
|
||||
[AFID Event List](https://docs.amd.com/r/en-US/AMD_Field_ID_70122_v1.0/AFID-Event-List) for more
|
||||
information.
|
||||
|
||||
## From concept to action
|
||||
|
||||
AMD SMI provides tools to programmatically monitor and manage these RAS features.
|
||||
|
||||
:::::{tab-set}
|
||||
::::{tab-item} C/C++
|
||||
The AMD SMI library provides APIs to query ECC error counts and manage CPER records
|
||||
(list, decode, and clear).
|
||||
|
||||
See [ECC information](/doxygen/docBin/html/group__tagECCInfo) and [RAS
|
||||
information](/doxygen/docBin/html/group__tagRasInfo) for available APIs.
|
||||
::::
|
||||
|
||||
::::{tab-item} Python
|
||||
See related APIs:
|
||||
|
||||
- [](/reference/amdsmi-py-api.md#amdsmi_get_gpu_ecc_count)
|
||||
- [](/reference/amdsmi-py-api.md#amdsmi_get_gpu_ecc_enabled)
|
||||
- [](/reference/amdsmi-py-api.md#amdsmi_get_gpu_ecc_status)
|
||||
- [](/reference/amdsmi-py-api.md#amdsmi_get_gpu_total_ecc_count)
|
||||
- [](/reference/amdsmi-py-api.md#amdsmi_get_gpu_cper_entries)
|
||||
- [](/reference/amdsmi-py-api.md#amdsmi_get_afids_from_cper)
|
||||
- [](/reference/amdsmi-py-api.md#amdsmi_get_gpu_ras_feature_info)
|
||||
- [](/reference/amdsmi-py-api.md#amdsmi_get_gpu_ras_block_features_enabled)
|
||||
::::
|
||||
|
||||
::::{tab-item} amd-smi CLI
|
||||
See [`amd-smi ras --help`](/how-to/amdsmi-cli-tool.md#amd-smi-ras) for details and available options.
|
||||
```shell
|
||||
amd-smi ras --help
|
||||
```
|
||||
::::
|
||||
:::::
|
||||
|
||||
## Further reading
|
||||
|
||||
- [AMD Field ID](https://docs.amd.com/r/en-US/AMD_Field_ID_70122_v1.0/Introduction)
|
||||
- [CPER (UEFI specification)](https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html)
|
||||
- [Reliability, availability, serviceability (Linux kernel)](https://docs.kernel.org/admin-guide/RAS/main.html)
|
||||
@@ -0,0 +1,85 @@
|
||||
# Configuration file for the Sphinx documentation builder.
|
||||
#
|
||||
# This file only contains a selection of the most common options. For a full
|
||||
# list see the documentation:
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html
|
||||
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.append(str(Path("_extension").resolve()))
|
||||
|
||||
|
||||
# get version number to print in docs
|
||||
def get_version_info(filepath):
|
||||
with open(filepath, "r") as f:
|
||||
content = f.read()
|
||||
|
||||
version_pattern = (
|
||||
r"^#define\s+AMDSMI_LIB_VERSION_MAJOR\s+(\d+)\s*$|"
|
||||
r"^#define\s+AMDSMI_LIB_VERSION_MINOR\s+(\d+)\s*$|"
|
||||
r"^#define\s+AMDSMI_LIB_VERSION_RELEASE\s+(\d+)\s*$"
|
||||
)
|
||||
|
||||
matches = re.findall(version_pattern, content, re.MULTILINE)
|
||||
|
||||
if len(matches) == 3:
|
||||
version_major, version_minor, version_release = [
|
||||
match for match in matches if any(match)
|
||||
]
|
||||
return version_major[0], version_minor[1], version_release[2]
|
||||
else:
|
||||
raise ValueError("Couldn't find all VERSION numbers.")
|
||||
|
||||
|
||||
version_major, version_minor, version_release = get_version_info(
|
||||
"../include/amd_smi/amdsmi.h"
|
||||
)
|
||||
version_number = f"{version_major}.{version_minor}.{version_release}"
|
||||
|
||||
# project info
|
||||
project = "AMD SMI"
|
||||
author = "Advanced Micro Devices, Inc."
|
||||
copyright = "Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved."
|
||||
version = version_number
|
||||
release = version_number
|
||||
|
||||
html_theme = "rocm_docs_theme"
|
||||
html_theme_options = {"flavor": "rocm"}
|
||||
html_title = f"AMD SMI {version_number} documentation"
|
||||
suppress_warnings = ["etoc.toctree"]
|
||||
external_toc_path = "./sphinx/_toc.yml"
|
||||
|
||||
external_projects_current_project = "amdsmi"
|
||||
extensions = ["rocm_docs", "rocm_docs.doxygen", "go_api_ref"]
|
||||
|
||||
doxygen_root = "doxygen"
|
||||
doxysphinx_enabled = True
|
||||
doxygen_project = {
|
||||
"name": "AMD SMI C++ API reference",
|
||||
"path": "doxygen/docBin/xml",
|
||||
}
|
||||
|
||||
|
||||
def generate_doxyfile(app, _):
|
||||
doxyfile_in = Path(app.confdir) / doxygen_root / "Doxyfile.in"
|
||||
doxyfile_out = Path(app.confdir) / doxygen_root / "Doxyfile"
|
||||
|
||||
if not doxyfile_in.exists():
|
||||
from sphinx.errors import ConfigError
|
||||
|
||||
raise ConfigError(f"Missing Doxyfile.in at {doxyfile_in}")
|
||||
|
||||
with open(doxyfile_in) as f:
|
||||
content = f.read()
|
||||
|
||||
content = content.replace("@PROJECT_NUMBER@", version_number)
|
||||
|
||||
with open(doxyfile_out, "w") as f:
|
||||
f.write(content)
|
||||
|
||||
|
||||
def setup(app):
|
||||
app.connect("config-inited", generate_doxyfile, priority=100)
|
||||
return {"parallel_read_safe": True, "parallel_write_safe": True}
|
||||
TEMPAT SAMPAH
Binary file not shown.
|
After Width: | Height: | Ukuran: 62 KiB |
@@ -0,0 +1,2 @@
|
||||
docBin/
|
||||
Doxyfile
|
||||
File diff ditekan karena terlalu besar
Load Diff
File diff ditekan karena terlalu besar
Load Diff
@@ -0,0 +1,233 @@
|
||||
---
|
||||
myst:
|
||||
html_meta:
|
||||
"description lang=en": "Get started with the AMD SMI C++ library. Basic usage and examples."
|
||||
"keywords": "api, smi, lib, c++, system, management, interface, ROCm"
|
||||
---
|
||||
|
||||
# AMD SMI C++ library usage and examples
|
||||
|
||||
This section presents a brief overview and some basic examples on the AMD SMI
|
||||
library's usage. Whether you are developing applications for performance
|
||||
monitoring, system diagnostics, or resource allocation, the AMD SMI C++ library
|
||||
serves as a valuable tool for leveraging the full potential of AMD hardware in
|
||||
your projects.
|
||||
|
||||
```{note}
|
||||
``hipcc`` and other compilers will not automatically link in the ``libamd_smi``
|
||||
dynamic library. To compile code that uses the AMD SMI library API, ensure the
|
||||
``libamd_smi.so`` can be located by setting the ``LD_LIBRARY_PATH`` environment
|
||||
variable to the directory containing ``librocm_smi64.so`` (usually
|
||||
``/opt/rocm/lib``) or by passing the ``-lamd_smi`` flag to the compiler.
|
||||
```
|
||||
|
||||
```{note}
|
||||
The environment variable ``AMDSMI_GPU_METRICS_CACHE_MS`` may be set to
|
||||
control the internal GPU metrics cache duration (ms).
|
||||
Default 1, set to 0 to disable.
|
||||
```
|
||||
|
||||
```{note}
|
||||
The environment variable ``AMDSMI_ASIC_INFO_CACHE_MS`` may be set to
|
||||
control the internal GPU asic info cache duration (ms).
|
||||
Default 10000 ms, set to 0 to disable.
|
||||
```
|
||||
|
||||
```{seealso}
|
||||
Refer to the [C++ library API reference](../reference/amdsmi-cpp-api.md).
|
||||
```
|
||||
|
||||
(device_socket_handle)=
|
||||
## Device and socket handles
|
||||
|
||||
Many functions in the library take a _socket handle_ or _device handle_. A
|
||||
_socket_ refers to a physical hardware socket, abstracted by the library to
|
||||
represent the hardware more effectively to the user. While there is always one
|
||||
unique GPU per socket, an APU may house both a GPU and CPU on the same socket.
|
||||
For MI200 GPUs, multiple GCDs may reside within a single socket
|
||||
|
||||
To identify the sockets in a system, use the `amdsmi_get_socket_handles()`
|
||||
function, which returns a list of socket handles. These handles can then be used
|
||||
with `amdsmi_get_processor_handles()` to query devices within each socket. The
|
||||
device handle is used to differentiate between detected devices; however, it's
|
||||
important to note that a device handle may change after restarting the
|
||||
application, so it should not be considered a persistent identifier across
|
||||
processes.
|
||||
|
||||
The list of socket handles obtained from `amdsmi_get_socket_handles()` can
|
||||
also be used to query the CPUs in each socket by calling
|
||||
`amdsmi_get_processor_handles_by_type()`. This function can then be called again
|
||||
to query the cores within each CPU.
|
||||
|
||||
(cpp_hello_amdsmi)=
|
||||
## Hello AMD SMI
|
||||
|
||||
An application using AMD SMI must call `amdsmi_init()` to initialize the AMI SMI
|
||||
library before all other calls. This call initializes the internal data
|
||||
structures required for subsequent AMD SMI operations. In the call, a flag can
|
||||
be passed to indicate if the application is interested in a specific device
|
||||
type.
|
||||
|
||||
`amdsmi_shut_down()` must be the last call to properly close connection to
|
||||
driver and make sure that any resources held by AMD SMI are released.
|
||||
|
||||
1. A simple "Hello World" type program that displays the temperature of detected
|
||||
devices.
|
||||
|
||||
```{note}
|
||||
Sample build example:
|
||||
$ g++ -I/opt/rocm/include <file_name>.cc -L/opt/rocm/lib -lamd_smi -o <filename>
|
||||
|
||||
Users /opt/rocm-*/bin path may differ (depending on install), please locate the path of your libamd_smi.so.*.
|
||||
For example:
|
||||
|
||||
$ sudo find /opt/ -iname libamd_smi.so*
|
||||
/opt/rocm-6.4.1/lib/libamd_smi.so.25.0
|
||||
/opt/rocm-6.4.1/lib/libamd_smi.so
|
||||
```
|
||||
|
||||
The code is as follows:
|
||||
|
||||
```cpp
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include "amd_smi/amdsmi.h"
|
||||
|
||||
int main() {
|
||||
amdsmi_status_t ret;
|
||||
|
||||
// Init amdsmi for sockets and devices. Here we are only interested in AMD_GPUS.
|
||||
ret = amdsmi_init(AMDSMI_INIT_AMD_GPUS);
|
||||
|
||||
// Get all sockets
|
||||
uint32_t socket_count = 0;
|
||||
|
||||
// Get the socket count available in the system.
|
||||
ret = amdsmi_get_socket_handles(&socket_count, nullptr);
|
||||
|
||||
// Allocate the memory for the sockets
|
||||
std::vector<amdsmi_socket_handle> sockets(socket_count);
|
||||
// Get the socket handles in the system
|
||||
ret = amdsmi_get_socket_handles(&socket_count, &sockets[0]);
|
||||
|
||||
std::cout << "Total Socket: " << socket_count << std::endl;
|
||||
|
||||
// For each socket, get identifier and devices
|
||||
for (uint32_t i=0; i < socket_count; i++) {
|
||||
// Get Socket info
|
||||
char socket_info[128];
|
||||
ret = amdsmi_get_socket_info(sockets[i], 128, socket_info);
|
||||
std::cout << "Socket " << socket_info<< std::endl;
|
||||
|
||||
// Get the device count for the socket.
|
||||
uint32_t device_count = 0;
|
||||
ret = amdsmi_get_processor_handles(sockets[i], &device_count, nullptr);
|
||||
|
||||
// Allocate the memory for the device handlers on the socket
|
||||
std::vector<amdsmi_processor_handle> processor_handles(device_count);
|
||||
// Get all devices of the socket
|
||||
ret = amdsmi_get_processor_handles(sockets[i],
|
||||
&device_count, &processor_handles[0]);
|
||||
|
||||
// For each device of the socket, get name and temperature.
|
||||
for (uint32_t j=0; j < device_count; j++) {
|
||||
// Get device type. Since the amdsmi is initialized with
|
||||
// AMD_SMI_INIT_AMD_GPUS, the processor_type must be AMDSMI_PROCESSOR_TYPE_AMD_GPU.
|
||||
processor_type_t processor_type;
|
||||
ret = amdsmi_get_processor_type(processor_handles[j], &processor_type);
|
||||
if (processor_type != AMDSMI_PROCESSOR_TYPE_AMD_GPU) {
|
||||
std::cout << "Expect AMDSMI_PROCESSOR_TYPE_AMD_GPU device type!\n";
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Get device name
|
||||
amdsmi_board_info_t board_info;
|
||||
ret = amdsmi_get_gpu_board_info(processor_handles[j], &board_info);
|
||||
std::cout << "\tdevice "
|
||||
<< j <<"\n\t\tName:" << board_info.product_name << std::endl;
|
||||
|
||||
// Get temperature
|
||||
int64_t val_i64 = 0;
|
||||
ret = amdsmi_get_temp_metric(processor_handles[j], AMDSMI_TEMPERATURE_TYPE_EDGE,
|
||||
AMDSMI_TEMP_CURRENT, &val_i64);
|
||||
std::cout << "\t\tTemperature: " << val_i64 << "C" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
// Clean up resources allocated at amdsmi_init. It will invalidate sockets
|
||||
// and devices pointers
|
||||
ret = amdsmi_shut_down();
|
||||
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
|
||||
2. A sample program that displays the power of detected CPUs.
|
||||
|
||||
```{note}
|
||||
Sample build example:
|
||||
$ g++ -DENABLE_ESMI -I/opt/rocm/include <file_name>.cc -L/opt/rocm/lib -lamd_smi -o <filename>
|
||||
|
||||
For finding available rocm include and library path, see building example on sample program 1 above.
|
||||
```
|
||||
|
||||
The code is as follows:
|
||||
|
||||
```cpp
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include "amd_smi/amdsmi.h"
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
amdsmi_status_t ret;
|
||||
uint32_t socket_count = 0;
|
||||
|
||||
// Initialize amdsmi for AMD CPUs
|
||||
ret = amdsmi_init(AMDSMI_INIT_AMD_CPUS);
|
||||
|
||||
ret = amdsmi_get_socket_handles(&socket_count, nullptr);
|
||||
|
||||
// Allocate the memory for the sockets
|
||||
std::vector<amdsmi_socket_handle> sockets(socket_count);
|
||||
|
||||
// Get the sockets of the system
|
||||
ret = amdsmi_get_socket_handles(&socket_count, &sockets[0]);
|
||||
|
||||
std::cout << "Total Socket: " << socket_count << std::endl;
|
||||
|
||||
// For each socket, get cpus
|
||||
for (uint32_t i = 0; i < socket_count; i++) {
|
||||
uint32_t cpu_count = 0;
|
||||
|
||||
// Set processor type as AMDSMI_PROCESSOR_TYPE_AMD_CPU
|
||||
processor_type_t processor_type = AMDSMI_PROCESSOR_TYPE_AMD_CPU;
|
||||
ret = amdsmi_get_processor_handles_by_type(sockets[i], processor_type, nullptr, &cpu_count);
|
||||
|
||||
// Allocate the memory for the cpus
|
||||
std::vector<amdsmi_processor_handle> plist(cpu_count);
|
||||
|
||||
// Get the cpus for each socket
|
||||
ret = amdsmi_get_processor_handles_by_type(sockets[i], processor_type, &plist[0], &cpu_count);
|
||||
|
||||
for (uint32_t index = 0; index < plist.size(); index++) {
|
||||
uint32_t socket_power;
|
||||
std::cout<<"CPU "<<index<<"\t"<< std::endl;
|
||||
std::cout<<"Power (Watts): ";
|
||||
|
||||
ret = amdsmi_get_cpu_socket_power(plist[index], &socket_power);
|
||||
if(ret != AMDSMI_STATUS_SUCCESS)
|
||||
std::cout<<"Failed to get cpu socket power"<<"["<<index<<"] , Err["<<ret<<"] "<< std::endl;
|
||||
|
||||
if (!ret) {
|
||||
std::cout<<static_cast<double>(socket_power)/1000<<std::endl;
|
||||
}
|
||||
std::cout<<std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
// Clean up resources allocated at amdsmi_init
|
||||
ret = amdsmi_shut_down();
|
||||
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
@@ -0,0 +1,87 @@
|
||||
---
|
||||
myst:
|
||||
html_meta:
|
||||
"description lang=en": "Get started with the AMD SMI Go interface."
|
||||
"keywords": "api, smi, lib, go, golang, system, management, interface, ROCm"
|
||||
---
|
||||
|
||||
# AMD SMI Go interface overview
|
||||
|
||||
The AMD SMI Go interface provides a convenient way to interact with AMD
|
||||
hardware through a simple and accessible [API](../reference/amdsmi-go-api.md).
|
||||
The API is compatible with Go 1.20 and higher and requires the AMD driver to
|
||||
be loaded for initialization. Review the [prerequisites](#install_reqs).
|
||||
|
||||
```{seealso}
|
||||
Refer to the [Go library API reference](../reference/amdsmi-go-api.md).
|
||||
```
|
||||
|
||||
(go_prereqs)=
|
||||
## Prerequisites
|
||||
|
||||
Before get started, make sure your environment satisfies the following prerequisites.
|
||||
See the [requirements](#install_reqs) section for more information.
|
||||
|
||||
1. Ensure `amdgpu` drivers are installed properly for initialization.
|
||||
|
||||
2. Export `LD_LIBRARY_PATH` to the `amdsmi` installation directory.
|
||||
|
||||
```bash
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/rocm/lib:/opt/rocm/lib64:
|
||||
```
|
||||
|
||||
3. Install Go 1.20+.
|
||||
|
||||
Download Go from [https://go.dev/dl/](https://go.dev/dl/) and follow the
|
||||
official installation documentation at [Download and
|
||||
install](https://go.dev/doc/install).
|
||||
|
||||
Alternatively, use a third-party utility like update-golang.
|
||||
|
||||
```bash
|
||||
git clone https://github.com/udhos/update-golang
|
||||
cd update-golang
|
||||
sudo ./update-golang.sh
|
||||
source /etc/profile.d/golang_path.sh
|
||||
go version
|
||||
```
|
||||
|
||||
## Get started
|
||||
|
||||
```{note}
|
||||
``hipcc`` and other compilers will not automatically link in the ``libamd_smi``
|
||||
dynamic library. To compile code that uses the AMD SMI library API, ensure the
|
||||
``libamd_smi.so`` can be located by setting the ``LD_LIBRARY_PATH`` environment
|
||||
variable to the directory containing ``librocm_smi64.so`` (usually
|
||||
``/opt/rocm/lib``) or by passing the ``-lamd_smi`` flag to the compiler.
|
||||
```
|
||||
|
||||
A Go application using AMD SMI must call `goamdsmi.GO_gpu_init()` to initialize
|
||||
the AMI SMI library before all other calls. This call initializes the internal
|
||||
data structures required for subsequent AMD SMI operations.
|
||||
|
||||
`goamdsmi.GO_gpu_shutdown()` must be the last call to properly close connection to
|
||||
driver and make sure that any resources held by AMD SMI are released.
|
||||
|
||||
## Usage
|
||||
|
||||
For an example on using the AMD SMI Go API, refer to this implementation
|
||||
[https://github.com/amd/amd_smi_exporter/tree/master](https://github.com/amd/amd_smi_exporter/tree/master).
|
||||
|
||||
```{seealso}
|
||||
Refer to the [Go library API reference](../reference/amdsmi-go-api.md).
|
||||
```
|
||||
|
||||
### Add AMD SMI library to your project
|
||||
|
||||
To include the AMD SMI Go API in your project, update your Makefile or Go module configuration
|
||||
to fetch the appropriate version of the AMD SMI library.
|
||||
|
||||
```shell
|
||||
go get github.com/ROCm/amdsmi@amd-staging
|
||||
```
|
||||
|
||||
When using a Makefile, ensure you're fetching the latest AMD SMI repository
|
||||
with Go API support. See
|
||||
[https://github.com/amd/amd_smi_exporter/blob/master/src/Makefile](https://github.com/amd/amd_smi_exporter/blob/master/src/Makefile)
|
||||
for an example implementation.
|
||||
@@ -0,0 +1,150 @@
|
||||
---
|
||||
myst:
|
||||
html_meta:
|
||||
"description lang=en": "Get started with the AMD SMI Python interface."
|
||||
"keywords": "api, smi, lib, py, system, management, interface, ROCm"
|
||||
---
|
||||
|
||||
# AMD SMI Python interface overview
|
||||
|
||||
The AMD SMI Python interface provides a convenient way to interact with AMD
|
||||
hardware through a simple and accessible [API](../reference/amdsmi-py-api.md).
|
||||
|
||||
```{seealso}
|
||||
Refer to the [Python library API reference](../reference/amdsmi-py-api.md).
|
||||
```
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Before get started, make sure your environment satisfies the following prerequisites.
|
||||
See the [requirements](#install_reqs) section for more information.
|
||||
|
||||
1. Ensure `amdgpu` drivers are installed properly for initialization.
|
||||
|
||||
2. Export `LD_LIBRARY_PATH` to the `amdsmi` installation directory.
|
||||
|
||||
```bash
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/rocm/lib:/opt/rocm/lib64:
|
||||
```
|
||||
|
||||
3. Install Python 3.6.8+.
|
||||
|
||||
## Get started
|
||||
|
||||
```{note}
|
||||
``hipcc`` and other compilers will not automatically link in the ``libamd_smi``
|
||||
dynamic library. To compile code that uses the AMD SMI library API, ensure the
|
||||
``libamd_smi.so`` can be located by setting the ``LD_LIBRARY_PATH`` environment
|
||||
variable to the directory containing ``librocm_smi64.so`` (usually
|
||||
``/opt/rocm/lib``) or by passing the ``-lamd_smi`` flag to the compiler.
|
||||
```
|
||||
|
||||
```{note}
|
||||
The environment variable ``AMDSMI_GPU_METRICS_CACHE_MS`` may be set to
|
||||
control the internal GPU metrics cache duration (ms).
|
||||
Default 1, set to 0 to disable.
|
||||
|
||||
The environment variable ``AMDSMI_ASIC_INFO_CACHE_MS`` may be set to
|
||||
control the internal GPU asic info cache duration (ms).
|
||||
Default 10000 ms, set to 0 to disable.
|
||||
|
||||
You can apply them in one of two ways:
|
||||
|
||||
1. In Python code (before the AMDSMI library loads):
|
||||
```
|
||||
|
||||
```python
|
||||
import os
|
||||
os.environ["AMDSMI_GPU_METRICS_CACHE_MS"] = "200"
|
||||
from amdsmi import *
|
||||
```
|
||||
|
||||
```{note}
|
||||
2. On the shell when invoking Python:
|
||||
```
|
||||
|
||||
```shell
|
||||
AMDSMI_GPU_METRICS_CACHE_MS=200 python tools/amdsmi_quick_start.py
|
||||
```
|
||||
|
||||
To get started, the `amdsmi` folder should be copied and placed next to
|
||||
the importing script. Import it as follows:
|
||||
|
||||
```python
|
||||
from amdsmi import *
|
||||
|
||||
try:
|
||||
amdsmi_init()
|
||||
|
||||
# amdsmi calls ...
|
||||
|
||||
except AmdSmiException as e:
|
||||
print(e)
|
||||
finally:
|
||||
try:
|
||||
amdsmi_shut_down()
|
||||
except AmdSmiException as e:
|
||||
print(e)
|
||||
```
|
||||
|
||||
(py_lib_fs)=
|
||||
### Folder structure
|
||||
|
||||
File name | Description
|
||||
----------------------|-------------------------------------------------
|
||||
`__init__.py` | Python package initialization file
|
||||
`amdsmi_interface.py` | Amdsmi library Python interface
|
||||
`amdsmi_wrapper.py` | Python wrapper around amdsmi binary
|
||||
`amdsmi_exception.py` | Amdsmi [exceptions](#py_exceptions) Python file
|
||||
|
||||
(py_usage)=
|
||||
## Usage
|
||||
|
||||
An application using AMD SMI must call `amdsmi_init()` to initialize the AMI SMI
|
||||
library before all other calls. This call initializes the internal data
|
||||
structures required for subsequent AMD SMI operations. In the call, a flag can
|
||||
be passed to indicate if the application is interested in a specific device
|
||||
type.
|
||||
|
||||
`amdsmi_shut_down()` must be the last call to properly close connection to
|
||||
driver and make sure that any resources held by AMD SMI are released.
|
||||
|
||||
```{seealso}
|
||||
Refer to the [Python library API reference](../reference/amdsmi-py-api.md).
|
||||
```
|
||||
|
||||
(py_exceptions)=
|
||||
## Exceptions
|
||||
|
||||
All exceptions are in `amdsmi_exception.py` file.
|
||||
|
||||
Exceptions that can be thrown by AMD SMI are:
|
||||
|
||||
* `AmdSmiException`: base amdsmi exception class
|
||||
* `AmdSmiLibraryException`: derives base `AmdSmiException` class and represents errors that can occur in amdsmi-lib.
|
||||
When this exception is thrown, `err_code` and `err_info` are set. `err_code` is an integer that corresponds to errors that can occur
|
||||
in amdsmi-lib and `err_info` is a string that explains the error that occurred.
|
||||
|
||||
For example:
|
||||
|
||||
```python
|
||||
try:
|
||||
num_of_GPUs = len(amdsmi_get_processor_handles())
|
||||
if num_of_GPUs == 0:
|
||||
print("No GPUs on machine")
|
||||
except AmdSmiException as e:
|
||||
print("Error code: {}".format(e.err_code))
|
||||
if e.err_code == amdsmi_wrapper.AMDSMI_STATUS_RETRY:
|
||||
print("Error info: {}".format(e.err_info))
|
||||
```
|
||||
|
||||
* `AmdSmiRetryException` : Derives `AmdSmiLibraryException` class and signals
|
||||
device is busy and call should be retried.
|
||||
* `AmdSmiTimeoutException` : Derives `AmdSmiLibraryException` class and
|
||||
represents that call had timed out.
|
||||
* `AmdSmiParameterException`: Derives base `AmdSmiException` class and
|
||||
represents errors related to invaild parameters passed to functions. When this
|
||||
exception is thrown, `err_msg` is set and it explains what is the actual and
|
||||
expected type of the parameters.
|
||||
* `AmdSmiBdfFormatException`: Derives base `AmdSmiException` class and
|
||||
represents invalid bdf format.
|
||||
@@ -0,0 +1,39 @@
|
||||
---
|
||||
myst:
|
||||
html_meta:
|
||||
"description lang=en": "Docker container configuration and setup procedures for AMD SMI."
|
||||
"keywords": "api, smi, lib, system, management, interface, ROCm, docker, systemd, modprobe"
|
||||
---
|
||||
|
||||
# Using AMD SMI in a Docker container
|
||||
|
||||
To ensure proper functionality of AMD SMI within a Docker container, the
|
||||
following configuration options must be included. These settings are
|
||||
particularly important for managing memory partitions, as partitioning depends
|
||||
on loading and unloading drivers (with `systemd` dependencies):
|
||||
|
||||
* `--cap-add=SYS_MODULE`
|
||||
|
||||
This option adds the `SYS_MODULE` capability to the container, allowing it to
|
||||
load and interact with kernel modules.
|
||||
|
||||
```{note}
|
||||
Granting `SYS_MODULE` increases the container's privileges and reduces
|
||||
isolation from the host. Use this option only with trusted containers and
|
||||
images.
|
||||
```
|
||||
|
||||
* `-v /lib/modules:/lib/modules`
|
||||
|
||||
By mounting the `/lib/modules/` directory into the container, the container
|
||||
gains access to the host's kernel modules, allowing it to load and interact
|
||||
with them. Without this access, operations requiring module loading like
|
||||
memory partitioning would fail.
|
||||
|
||||
For example:
|
||||
|
||||
```{image} ../data/how-to/setup-docker-container/docker-run-example.jpg
|
||||
:alt: Command line example of running a Docker container for AMD SMI
|
||||
:align: center
|
||||
:width: 100%
|
||||
```
|
||||
@@ -0,0 +1,65 @@
|
||||
---
|
||||
myst:
|
||||
html_meta:
|
||||
"description lang=en": "AMD SMI documentation and API reference."
|
||||
"keywords": "amdsmi, lib, cli, system, management, interface, amdgpu, admin, sys"
|
||||
---
|
||||
|
||||
# AMD SMI documentation
|
||||
|
||||
The AMD System Management Interface (AMD SMI) library offers a unified tool for
|
||||
managing and monitoring GPUs, particularly in high-performance computing
|
||||
environments. It provides a user-space interface that allows applications to
|
||||
control GPU operations, monitor performance, and retrieve information about the
|
||||
system's drivers and GPUs.
|
||||
|
||||
Find the source code at <https://github.com/ROCm/amdsmi>.
|
||||
|
||||
```{note}
|
||||
AMD SMI is the successor to <https://github.com/ROCm/rocm_smi_lib>.
|
||||
```
|
||||
|
||||
::::{grid} 2
|
||||
:gutter: 3
|
||||
|
||||
:::{grid-item-card} Install
|
||||
* [Library and CLI tool installation](./install/install.md)
|
||||
* [Build from source](./install/build.md)
|
||||
:::
|
||||
|
||||
:::{grid-item-card} How to
|
||||
* [C++ library usage](./how-to/amdsmi-cpp-lib.md)
|
||||
* [Python library usage](./how-to/amdsmi-py-lib.md)
|
||||
* [Go library usage](./how-to/amdsmi-go-lib.md)
|
||||
* [CLI tool usage](./how-to/amdsmi-cli-tool.md)
|
||||
* [Use AMD SMI in a Docker container](./how-to/setup-docker-container.md)
|
||||
:::
|
||||
|
||||
:::{grid-item-card} Reference
|
||||
* [C++ API](./reference/amdsmi-cpp-api.md)
|
||||
* [Modules](../doxygen/docBin/html/topics)
|
||||
* [Files](../doxygen/docBin/html/files)
|
||||
* [Globals](../doxygen/docBin/html/globals)
|
||||
* [Data structures](../doxygen/docBin/html/annotated)
|
||||
* [Data fields](../doxygen/docBin/html/functions_data_fields)
|
||||
* [Python API](./reference/amdsmi-py-api.md)
|
||||
* [Go API](./reference/amdsmi-go-api.md)
|
||||
:::
|
||||
|
||||
:::{grid-item-card} Conceptual
|
||||
* [Reliability, availability, serviceability](./conceptual/ras.md)
|
||||
:::
|
||||
|
||||
:::{grid-item-card} Tutorials
|
||||
* [AMD SMI examples (GitHub)](https://github.com/ROCm/amdsmi/tree/amd-staging/example)
|
||||
* [AMD SMI CLI walkthrough](https://rocm.blogs.amd.com/software-tools-optimization/amd-smi-overview/README.html)
|
||||
:::
|
||||
::::
|
||||
|
||||
To learn about contributing to AMD SMI, see [Contibuting to AMD
|
||||
SMI](https://github.com/ROCm/amdsmi/blob/amd-mainline/.github/CONTRIBUTING.md).
|
||||
To contribute to the documentation, see
|
||||
{doc}`Contributing to ROCm documentation <rocm:contribute/contributing>`.
|
||||
|
||||
Find ROCm licensing information on the
|
||||
{doc}`Licensing <rocm:about/license>` page.
|
||||
@@ -0,0 +1,109 @@
|
||||
---
|
||||
myst:
|
||||
html_meta:
|
||||
"description lang=en": "How to build AMD SMI from source."
|
||||
"keywords": "system, management, interface, contribute, contributing, ROCm, develop, testing"
|
||||
---
|
||||
|
||||
# Building AMD SMI
|
||||
|
||||
This section describes the prerequisites and steps to build AMD SMI from source.
|
||||
|
||||
(build_reqs)=
|
||||
## Required software
|
||||
|
||||
To build the AMD SMI library, the following components are required. Note that
|
||||
the software versions specified were used during development; earlier
|
||||
versions are not guaranteed to work.
|
||||
|
||||
* CMake (v3.15.0 or later) -- `python3 -m pip install cmake`
|
||||
* g++ (v5.4.0 or later)
|
||||
* libdrm-dev (for Ubuntu and Debian)
|
||||
* libdrm-devel (for RPM-based distributions)
|
||||
|
||||
In order to build the AMD SMI Python package, the following components are
|
||||
required:
|
||||
|
||||
* Python (3.6.8 or later)
|
||||
* virtualenv -- `python3 -m pip install virtualenv`
|
||||
|
||||
## Build steps
|
||||
|
||||
1. Clone the AMD SMI repository to your local Linux machine.
|
||||
|
||||
```shell
|
||||
git clone https://github.com/ROCm/amdsmi.git
|
||||
```
|
||||
|
||||
2. The default installation location for the library and headers is `/opt/rocm`.
|
||||
Before installation, any old ROCm directories should be deleted:
|
||||
|
||||
* `/opt/rocm`
|
||||
* `/opt/rocm-<version_number>`
|
||||
|
||||
3. Build the library by following the typical CMake build sequence (run as root
|
||||
user or use `sudo` before `make install` command); for instance:
|
||||
|
||||
```bash
|
||||
mkdir -p build
|
||||
cd build
|
||||
cmake ..
|
||||
make -j $(nproc)
|
||||
make install
|
||||
```
|
||||
|
||||
The built library is located in the `build/` directory. To build the `rpm`
|
||||
and `deb` packages use the following command:
|
||||
|
||||
```bash
|
||||
make package
|
||||
```
|
||||
|
||||
(rebuild_py_wrapper)=
|
||||
## Rebuild the Python wrapper
|
||||
|
||||
The Python wrapper for the AMD SMI library is found in the [auto-generated
|
||||
file](#py_lib_fs) `py-interface/amdsmi_wrapper.py`. It is essential to
|
||||
regenerate this wrapper whenever there are changes to the C++ API. It is not
|
||||
regenerated automatically.
|
||||
|
||||
To regenerate the wrapper, use the following command.
|
||||
|
||||
```shell
|
||||
./update_wrapper.sh
|
||||
```
|
||||
|
||||
After this command, the file in `py-interface/amdsmi_wrapper.py` will be updated
|
||||
on compile.
|
||||
|
||||
```{note}
|
||||
You need Docker installed on your system to regenerate the Python wrapper.
|
||||
```
|
||||
|
||||
(build_tests)=
|
||||
## Build the tests
|
||||
|
||||
To verify the build and capabilities of AMD SMI on your system, as well as to
|
||||
see practical examples of its usage, you can build and run the available [tests
|
||||
in the repository](https://github.com/ROCm/amdsmi/tree/amd-staging/tests).
|
||||
Follow these steps to build the tests:
|
||||
|
||||
```bash
|
||||
mkdir -p build
|
||||
cd build
|
||||
cmake -DBUILD_TESTS=ON ..
|
||||
make -j $(nproc)
|
||||
```
|
||||
|
||||
(run_tests)=
|
||||
### Run the tests
|
||||
|
||||
Once the tests are [built](#build_tests), you can run them by executing the
|
||||
`amdsmitst` program. The executable can be found at `build/tests/amd_smi_test/`.
|
||||
|
||||
(build_docs)=
|
||||
## Build the docs
|
||||
|
||||
To build the documentation, follow the instructions at [Building
|
||||
documentation](https://rocm.docs.amd.com/en/latest/contribute/building.html).
|
||||
|
||||
@@ -0,0 +1,171 @@
|
||||
---
|
||||
myst:
|
||||
html_meta:
|
||||
"description lang=en": "How to install AMD SMI libraries and CLI tool."
|
||||
"keywords": "system, management, interface, cpu, gpu, hsmp, versions"
|
||||
---
|
||||
|
||||
# Install the AMD SMI library and CLI tool
|
||||
|
||||
This section describes how to install the AMD SMI library, Python interface,
|
||||
and command line tool either as part of the
|
||||
{doc}`ROCm software stack <rocm:what-is-rocm>` -- or manually.
|
||||
|
||||
(install_reqs)=
|
||||
## Requirements
|
||||
|
||||
The following are required to install and use the AMD SMI library through its language interfaces and CLI.
|
||||
|
||||
* The `amdgpu` driver must be loaded for AMD SMI initialization to work. See
|
||||
[Install the amdgpu driver](#install_amdgpu_driver).
|
||||
|
||||
* Export `LD_LIBRARY_PATH` to the `amdsmi` installation directory.
|
||||
|
||||
```bash
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/rocm/lib:/opt/rocm/lib64
|
||||
```
|
||||
|
||||
### Supported platforms
|
||||
|
||||
The AMD SMI library supports Linux bare metal and Linux virtual machine guest
|
||||
for AMD GPUs and AMD EPYC™ CPUs via
|
||||
[esmi_ib_lirary](https://github.com/amd/esmi_ib_library). To use AMD SMI for virtualization, refer to
|
||||
the [AMD SMI for Virtualization documentation](https://instinct.docs.amd.com/projects/amd-smi-virt/en/latest/index.html).
|
||||
|
||||
AMD SMI library can run on AMD ROCm supported platforms. Refer to
|
||||
{doc}`System requirements (Linux) <rocm-install-on-linux:reference/system-requirements>`
|
||||
for more information.
|
||||
<!--https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html-->
|
||||
|
||||
To run the AMD SMI library, the `amdgpu` driver and the `amd_hsmp` or `hsmp_acpi` driver need to be installed. Optionally, `libdrm` can be installed to query firmware
|
||||
information and hardware IPs.
|
||||
|
||||
### Python interface and CLI tool prerequisites
|
||||
|
||||
* Python version 3.6.8 or greater (64-bit)
|
||||
|
||||
::::{note}
|
||||
During the driver installation process on Azure Linux 3, you might encounter the `ModuleNotFoundError: No module named 'more_itertools'` warning. This warning is a result of the reintroduction of `python3-wheel` and `python3-setuptools` dependencies in the CMake of AMD SMI, which requires `more_itertools` to build these Python libraries. This issue will be fixed in a future ROCm release. As a workaround, use the following command before installation:
|
||||
|
||||
```
|
||||
sudo python3 -m pip install more_itertools
|
||||
```
|
||||
::::
|
||||
|
||||
### Go interface prerequisites
|
||||
|
||||
* Go version 1.20 or greater
|
||||
|
||||
(install_amdgpu_driver)=
|
||||
## Install the amdgpu driver
|
||||
|
||||
```{note}
|
||||
As of ROCm 7.0.0, the `amdgpu` driver is distributed separately from the ROCm
|
||||
software stack. See
|
||||
{doc}`rocm-install-on-linux:reference/user-kernel-space-compat-matrix` for
|
||||
driver to ROCm user space compatibility information.
|
||||
```
|
||||
|
||||
Confirm that your Linux kernel version matches the system requirements described in
|
||||
{ref}`rocm-install-on-linux:supported_distributions`.
|
||||
|
||||
For up-to-date installation instructions, see the [AMD GPU Driver (amdgpu)
|
||||
documentation](https://instinct.docs.amd.com/projects/amdgpu-docs/en/latest/install/detailed-install/prerequisites.html).
|
||||
|
||||
(install_amdgpu_rocm)=
|
||||
## Install AMD SMI with ROCm
|
||||
|
||||
AMD SMI is included as a core package in the ROCm software stack as part of the
|
||||
`rocm-developer-tools` meta package. See [ROCm runtime
|
||||
packages](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/package-manager-integration.html#id3)
|
||||
for more information.
|
||||
|
||||
```{note}
|
||||
The `amdgpu-install` script is no longer the recommended way to install ROCm.
|
||||
Install using your supported Linux distribution's package manager instead.
|
||||
```
|
||||
|
||||
For up-to-date installation instructions via package manager, see {doc}`ROCm
|
||||
installation for Linux <rocm-install-on-linux:install/prerequisites>`.
|
||||
|
||||
After installing the `amdgpu` driver and ROCm, verify your AMD SMI installation:
|
||||
|
||||
```shell
|
||||
amd-smi
|
||||
```
|
||||
|
||||
(install_without_rocm)=
|
||||
## Install AMD SMI without ROCm
|
||||
|
||||
The following are example steps to install the AMD SMI libraries and CLI tool on
|
||||
Ubuntu 22.04.
|
||||
|
||||
1. Install the library.
|
||||
|
||||
```shell
|
||||
sudo apt install amd-smi-lib
|
||||
```
|
||||
|
||||
2. Add the installation directory to your PATH. If installed with ROCm, ignore
|
||||
this step.
|
||||
|
||||
```shell
|
||||
export PATH="${PATH:+${PATH}:}~/opt/rocm/bin"
|
||||
```
|
||||
|
||||
3. Verify your installation.
|
||||
|
||||
```shell
|
||||
amd-smi --help
|
||||
```
|
||||
|
||||
## Optionally enable CLI autocompletion
|
||||
|
||||
The `amd-smi` CLI application supports autocompletion. If `argcomplete` is not
|
||||
installed and enabled already, do so using the following commands.
|
||||
|
||||
```shell
|
||||
python3 -m pip install argcomplete
|
||||
activate-global-python-argcomplete --user
|
||||
# restart shell to enable
|
||||
```
|
||||
|
||||
(install-manual-py-lib)=
|
||||
## Install the Python library for multiple ROCm instances
|
||||
|
||||
If {doc}`multiple ROCm versions are installed
|
||||
<rocm-install-on-linux:install/install-methods/multi-version-install-index>` and you
|
||||
are not using `pyenv`, uninstall previous versions of AMD SMI before installing
|
||||
the desired version from your ROCm instance.
|
||||
|
||||
### Manually install the Python library
|
||||
|
||||
The following are example AMD SMI installation steps on Ubuntu 22.04 without
|
||||
ROCm.
|
||||
|
||||
1. Remove previous AMD SMI installation.
|
||||
|
||||
```shell
|
||||
python3 -m pip list | grep amd
|
||||
python3 -m pip uninstall amdsmi
|
||||
```
|
||||
|
||||
2. Install the AMD SMI Python library from your target ROCm instance.
|
||||
|
||||
```shell
|
||||
apt install amd-smi-lib
|
||||
cd /opt/rocm/share/amd_smi
|
||||
python3 -m pip install --upgrade pip
|
||||
python3 -m pip install --user .
|
||||
```
|
||||
|
||||
3. You should now have the AMD SMI Python library in your Python path:
|
||||
|
||||
```shell-session
|
||||
~$ python3
|
||||
Python 3.8.10 (default, May 26 2023, 14:05:08)
|
||||
[GCC 9.4.0] on linux
|
||||
Type "help", "copyright", "credits" or "license" for more information.
|
||||
>>> import amdsmi
|
||||
>>>
|
||||
```
|
||||
@@ -0,0 +1,9 @@
|
||||
.. meta::
|
||||
:description: Review the AMD SMI license agreement.
|
||||
:keywords: amdsmi
|
||||
|
||||
*******
|
||||
License
|
||||
*******
|
||||
|
||||
.. include:: ../LICENSE
|
||||
@@ -0,0 +1,21 @@
|
||||
---
|
||||
myst:
|
||||
html_meta:
|
||||
"description lang=en": "Explore the AMD SMI C++ API."
|
||||
"keywords": "api, smi, lib, cpp, header, system, management, interface, ROCm"
|
||||
---
|
||||
|
||||
# AMD SMI C++ API reference
|
||||
|
||||
This section provides comprehensive documentation for the AMD SMI C++ API.
|
||||
Explore these sections to understand the full scope of available
|
||||
functionalities and how to implement them in your applications.
|
||||
|
||||
- {doc}`Modules <../doxygen/docBin/html/topics>`
|
||||
|
||||
- {doc}`Files <../doxygen/docBin/html/files>`
|
||||
|
||||
- {doc}`Globals <../doxygen/docBin/html/globals>`
|
||||
|
||||
- {doc}`Data structures <../doxygen/docBin/html/annotated>`
|
||||
|
||||
@@ -0,0 +1,33 @@
|
||||
---
|
||||
myst:
|
||||
html_meta:
|
||||
"description lang=en": "Explore the AMD SMI Go API."
|
||||
"keywords": "api, smi, lib, system, management, interface, ROCm, golang"
|
||||
---
|
||||
|
||||
# AMD SMI Go API reference
|
||||
|
||||
The AMD SMI Go interface provides a convenient way to interact with AMD
|
||||
hardware through a simple and accessible API. The API is compatible with Go
|
||||
version 1.20 and higher and requires the AMD driver to be loaded for
|
||||
initialization. Review the [prerequisites](#go_prereqs) before getting
|
||||
started.
|
||||
|
||||
This section provides documentation for the AMD SMI Go API. Explore these
|
||||
sections to understand the full scope of available functionalities and how to
|
||||
implement them in your applications.
|
||||
|
||||
## GPU functions
|
||||
|
||||
```{eval-rst}
|
||||
.. go-api-ref:: ../../goamdsmi.go
|
||||
:section: gpu
|
||||
```
|
||||
|
||||
## CPU functions
|
||||
|
||||
|
||||
```{eval-rst}
|
||||
.. go-api-ref:: ../../goamdsmi.go
|
||||
:section: cpu
|
||||
```
|
||||
File diff ditekan karena terlalu besar
Load Diff
@@ -0,0 +1,9 @@
|
||||
---
|
||||
myst:
|
||||
html_meta:
|
||||
"description lang=en": "A summary of changes to AMD SMI APIs. The changelog is listed for reference and subject to change."
|
||||
"keywords": "api, smi, lib, changes, system, management, interface, ROCm"
|
||||
---
|
||||
|
||||
```{include} ../../CHANGELOG.md
|
||||
```
|
||||
@@ -0,0 +1,68 @@
|
||||
# Variables of the form ${<variable>} are substituted, currently the following
|
||||
# list is supported:
|
||||
# - ${branch} (or {branch}) the name of the current branch
|
||||
# - ${url} (or {url}) github url of the current project
|
||||
# - ${project:<project_name>} base url of the documentation of <project_name>
|
||||
# based on intersphinx_mapping.
|
||||
# These comments will also be removed.
|
||||
defaults:
|
||||
numbered: false
|
||||
root: index
|
||||
subtrees:
|
||||
- caption: Install
|
||||
entries:
|
||||
- file: install/install.md
|
||||
title: Library and CLI tool installation
|
||||
- file: install/build.md
|
||||
title: Build from source
|
||||
|
||||
- caption: How to
|
||||
entries:
|
||||
- file: how-to/amdsmi-cpp-lib.md
|
||||
title: C++ library usage
|
||||
- file: how-to/amdsmi-py-lib.md
|
||||
title: Python library usage
|
||||
- file: how-to/amdsmi-go-lib.md
|
||||
title: Go library usage
|
||||
- file: how-to/amdsmi-cli-tool.md
|
||||
title: CLI tool usage
|
||||
- file: how-to/setup-docker-container.md
|
||||
title: Use AMD SMI in a Docker container
|
||||
|
||||
- caption: Reference
|
||||
entries:
|
||||
- file: reference/amdsmi-cpp-api.md
|
||||
title: C++ API
|
||||
entries:
|
||||
- file: doxygen/docBin/html/topics
|
||||
title: Modules
|
||||
- file: doxygen/docBin/html/files
|
||||
title: Files
|
||||
- file: doxygen/docBin/html/globals
|
||||
title: Globals
|
||||
- file: doxygen/docBin/html/annotated
|
||||
title: Data structures
|
||||
- file: doxygen/docBin/html/functions_data_fields
|
||||
title: Data fields
|
||||
- file: reference/amdsmi-py-api.md
|
||||
title: Python API
|
||||
- file: reference/amdsmi-go-api.md
|
||||
title: Go API
|
||||
- file: reference/changelog.md
|
||||
title: Changelog
|
||||
|
||||
- caption: Conceptual
|
||||
entries:
|
||||
- file: conceptual/ras.md
|
||||
|
||||
- caption: Tutorials
|
||||
entries:
|
||||
- url: https://github.com/ROCm/amdsmi/tree/${branch}/example
|
||||
title: AMD SMI examples (GitHub)
|
||||
- url: https://rocm.blogs.amd.com/software-tools-optimization/amd-smi-overview/README.html
|
||||
title: AMD SMI CLI walkthrough
|
||||
|
||||
- caption: About
|
||||
entries:
|
||||
- file: license.md
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
rocm-docs-core[api_reference]==1.27.0
|
||||
@@ -0,0 +1,313 @@
|
||||
#
|
||||
# This file is autogenerated by pip-compile with Python 3.12
|
||||
# by the following command:
|
||||
#
|
||||
# pip-compile docs/sphinx/requirements.in
|
||||
#
|
||||
accessible-pygments==0.0.5
|
||||
# via pydata-sphinx-theme
|
||||
alabaster==1.0.0
|
||||
# via sphinx
|
||||
asttokens==3.0.0
|
||||
# via stack-data
|
||||
attrs==25.3.0
|
||||
# via
|
||||
# jsonschema
|
||||
# jupyter-cache
|
||||
# referencing
|
||||
babel==2.17.0
|
||||
# via
|
||||
# pydata-sphinx-theme
|
||||
# sphinx
|
||||
beautifulsoup4==4.13.5
|
||||
# via pydata-sphinx-theme
|
||||
breathe==4.36.0
|
||||
# via rocm-docs-core
|
||||
certifi==2025.8.3
|
||||
# via requests
|
||||
cffi==2.0.0
|
||||
# via
|
||||
# cryptography
|
||||
# pynacl
|
||||
charset-normalizer==3.4.3
|
||||
# via requests
|
||||
click==8.3.0
|
||||
# via
|
||||
# click-log
|
||||
# doxysphinx
|
||||
# jupyter-cache
|
||||
# sphinx-external-toc
|
||||
click-log==0.4.0
|
||||
# via doxysphinx
|
||||
comm==0.2.3
|
||||
# via ipykernel
|
||||
contourpy==1.3.3
|
||||
# via matplotlib
|
||||
cryptography==46.0.1
|
||||
# via pyjwt
|
||||
cycler==0.12.1
|
||||
# via matplotlib
|
||||
debugpy==1.8.17
|
||||
# via ipykernel
|
||||
decorator==5.2.1
|
||||
# via ipython
|
||||
docutils==0.21.2
|
||||
# via
|
||||
# myst-parser
|
||||
# pydata-sphinx-theme
|
||||
# sphinx
|
||||
doxysphinx==3.3.12
|
||||
# via rocm-docs-core
|
||||
executing==2.2.1
|
||||
# via stack-data
|
||||
fastjsonschema==2.21.2
|
||||
# via
|
||||
# nbformat
|
||||
# rocm-docs-core
|
||||
fonttools==4.60.0
|
||||
# via matplotlib
|
||||
gitdb==4.0.12
|
||||
# via gitpython
|
||||
gitpython==3.1.45
|
||||
# via rocm-docs-core
|
||||
greenlet==3.2.4
|
||||
# via sqlalchemy
|
||||
idna==3.10
|
||||
# via requests
|
||||
imagesize==1.4.1
|
||||
# via sphinx
|
||||
importlib-metadata==8.7.0
|
||||
# via
|
||||
# jupyter-cache
|
||||
# myst-nb
|
||||
ipykernel==6.30.1
|
||||
# via myst-nb
|
||||
ipython==9.5.0
|
||||
# via
|
||||
# ipykernel
|
||||
# myst-nb
|
||||
ipython-pygments-lexers==1.1.1
|
||||
# via ipython
|
||||
jedi==0.19.2
|
||||
# via ipython
|
||||
jinja2==3.1.6
|
||||
# via
|
||||
# myst-parser
|
||||
# sphinx
|
||||
jsonschema==4.25.1
|
||||
# via nbformat
|
||||
jsonschema-specifications==2025.9.1
|
||||
# via jsonschema
|
||||
jupyter-cache==1.0.1
|
||||
# via myst-nb
|
||||
jupyter-client==8.6.3
|
||||
# via
|
||||
# ipykernel
|
||||
# nbclient
|
||||
jupyter-core==5.8.1
|
||||
# via
|
||||
# ipykernel
|
||||
# jupyter-client
|
||||
# nbclient
|
||||
# nbformat
|
||||
kiwisolver==1.4.9
|
||||
# via matplotlib
|
||||
libsass==0.22.0
|
||||
# via doxysphinx
|
||||
lxml==5.2.1
|
||||
# via doxysphinx
|
||||
markdown-it-py==3.0.0
|
||||
# via
|
||||
# mdit-py-plugins
|
||||
# myst-parser
|
||||
markupsafe==3.0.2
|
||||
# via jinja2
|
||||
matplotlib==3.10.6
|
||||
# via doxysphinx
|
||||
matplotlib-inline==0.1.7
|
||||
# via
|
||||
# ipykernel
|
||||
# ipython
|
||||
mdit-py-plugins==0.5.0
|
||||
# via myst-parser
|
||||
mdurl==0.1.2
|
||||
# via markdown-it-py
|
||||
mpire==2.10.2
|
||||
# via doxysphinx
|
||||
myst-nb==1.3.0
|
||||
# via rocm-docs-core
|
||||
myst-parser==4.0.1
|
||||
# via myst-nb
|
||||
nbclient==0.10.2
|
||||
# via
|
||||
# jupyter-cache
|
||||
# myst-nb
|
||||
nbformat==5.10.4
|
||||
# via
|
||||
# jupyter-cache
|
||||
# myst-nb
|
||||
# nbclient
|
||||
nest-asyncio==1.6.0
|
||||
# via ipykernel
|
||||
numpy==1.26.4
|
||||
# via
|
||||
# contourpy
|
||||
# doxysphinx
|
||||
# matplotlib
|
||||
packaging==25.0
|
||||
# via
|
||||
# ipykernel
|
||||
# matplotlib
|
||||
# sphinx
|
||||
parso==0.8.5
|
||||
# via jedi
|
||||
pexpect==4.9.0
|
||||
# via ipython
|
||||
pillow==11.3.0
|
||||
# via matplotlib
|
||||
platformdirs==4.4.0
|
||||
# via jupyter-core
|
||||
prompt-toolkit==3.0.52
|
||||
# via ipython
|
||||
psutil==7.1.0
|
||||
# via ipykernel
|
||||
ptyprocess==0.7.0
|
||||
# via pexpect
|
||||
pure-eval==0.2.3
|
||||
# via stack-data
|
||||
pycparser==2.23
|
||||
# via cffi
|
||||
pydata-sphinx-theme==0.16.1
|
||||
# via
|
||||
# rocm-docs-core
|
||||
# sphinx-book-theme
|
||||
pygithub==2.8.1
|
||||
# via rocm-docs-core
|
||||
pygments==2.19.2
|
||||
# via
|
||||
# accessible-pygments
|
||||
# ipython
|
||||
# ipython-pygments-lexers
|
||||
# mpire
|
||||
# pydata-sphinx-theme
|
||||
# sphinx
|
||||
pyjson5==1.6.9
|
||||
# via doxysphinx
|
||||
pyjwt[crypto]==2.10.1
|
||||
# via pygithub
|
||||
pynacl==1.6.0
|
||||
# via pygithub
|
||||
pyparsing==3.2.5
|
||||
# via
|
||||
# doxysphinx
|
||||
# matplotlib
|
||||
python-dateutil==2.9.0.post0
|
||||
# via
|
||||
# jupyter-client
|
||||
# matplotlib
|
||||
pyyaml==6.0.3
|
||||
# via
|
||||
# jupyter-cache
|
||||
# myst-nb
|
||||
# myst-parser
|
||||
# rocm-docs-core
|
||||
# sphinx-external-toc
|
||||
pyzmq==27.1.0
|
||||
# via
|
||||
# ipykernel
|
||||
# jupyter-client
|
||||
referencing==0.36.2
|
||||
# via
|
||||
# jsonschema
|
||||
# jsonschema-specifications
|
||||
requests==2.32.5
|
||||
# via
|
||||
# pygithub
|
||||
# sphinx
|
||||
rocm-docs-core[api-reference]==1.27.0
|
||||
# via -r requirements.in
|
||||
roman-numerals-py==3.1.0
|
||||
# via sphinx
|
||||
rpds-py==0.27.1
|
||||
# via
|
||||
# jsonschema
|
||||
# referencing
|
||||
six==1.17.0
|
||||
# via python-dateutil
|
||||
smmap==5.0.2
|
||||
# via gitdb
|
||||
snowballstemmer==3.0.1
|
||||
# via sphinx
|
||||
soupsieve==2.8
|
||||
# via beautifulsoup4
|
||||
sphinx==8.2.3
|
||||
# via
|
||||
# breathe
|
||||
# myst-nb
|
||||
# myst-parser
|
||||
# pydata-sphinx-theme
|
||||
# rocm-docs-core
|
||||
# sphinx-book-theme
|
||||
# sphinx-copybutton
|
||||
# sphinx-design
|
||||
# sphinx-external-toc
|
||||
# sphinx-notfound-page
|
||||
sphinx-book-theme==1.1.3
|
||||
# via rocm-docs-core
|
||||
sphinx-copybutton==0.5.2
|
||||
# via rocm-docs-core
|
||||
sphinx-design==0.6.1
|
||||
# via rocm-docs-core
|
||||
sphinx-external-toc==1.0.1
|
||||
# via rocm-docs-core
|
||||
sphinx-notfound-page==1.1.0
|
||||
# via rocm-docs-core
|
||||
sphinxcontrib-applehelp==2.0.0
|
||||
# via sphinx
|
||||
sphinxcontrib-devhelp==2.0.0
|
||||
# via sphinx
|
||||
sphinxcontrib-htmlhelp==2.1.0
|
||||
# via sphinx
|
||||
sphinxcontrib-jsmath==1.0.1
|
||||
# via sphinx
|
||||
sphinxcontrib-qthelp==2.0.0
|
||||
# via sphinx
|
||||
sphinxcontrib-serializinghtml==2.0.0
|
||||
# via sphinx
|
||||
sqlalchemy==2.0.43
|
||||
# via jupyter-cache
|
||||
stack-data==0.6.3
|
||||
# via ipython
|
||||
tabulate==0.9.0
|
||||
# via jupyter-cache
|
||||
tornado==6.5.2
|
||||
# via
|
||||
# ipykernel
|
||||
# jupyter-client
|
||||
tqdm==4.67.1
|
||||
# via mpire
|
||||
traitlets==5.14.3
|
||||
# via
|
||||
# ipykernel
|
||||
# ipython
|
||||
# jupyter-client
|
||||
# jupyter-core
|
||||
# matplotlib-inline
|
||||
# nbclient
|
||||
# nbformat
|
||||
typing-extensions==4.15.0
|
||||
# via
|
||||
# beautifulsoup4
|
||||
# myst-nb
|
||||
# pydata-sphinx-theme
|
||||
# pygithub
|
||||
# referencing
|
||||
# sqlalchemy
|
||||
urllib3==2.5.0
|
||||
# via
|
||||
# pygithub
|
||||
# requests
|
||||
wcwidth==0.2.14
|
||||
# via prompt-toolkit
|
||||
zipp==3.23.0
|
||||
# via importlib-metadata
|
||||
@@ -0,0 +1,63 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
|
||||
option(ENABLE_ESMI_LIB "Build ESMI Library" ON)
|
||||
option(CMAKE_VERBOSE_MAKEFILE "Enable verbose output" ON)
|
||||
option(CMAKE_EXPORT_COMPILE_COMMANDS "Export compile commands for linters and autocompleters" ON)
|
||||
|
||||
# Compiler flags
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -m64 -msse -msse2")
|
||||
|
||||
if("${CMAKE_BUILD_TYPE}" STREQUAL Release)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2")
|
||||
else()
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb -O0 -DDEBUG")
|
||||
endif()
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ standard to use")
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
|
||||
if(CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.4.0)
|
||||
message("Compiler version is " ${CMAKE_CXX_COMPILER_VERSION})
|
||||
message(FATAL_ERROR "Require at least gcc-5.4.0")
|
||||
endif()
|
||||
|
||||
project(amd_smi_example)
|
||||
|
||||
# required variables
|
||||
if(DEFINED ENV{ROCM_PATH})
|
||||
set(ROCM_DIR "$ENV{ROCM_PATH}" CACHE STRING "ROCm directory.")
|
||||
else()
|
||||
set(ROCM_DIR "/opt/rocm" CACHE STRING "ROCm directory.")
|
||||
endif()
|
||||
|
||||
include(GNUInstallDirs)
|
||||
|
||||
# add package search paths
|
||||
# ../../../ should resolve to /opt/rocm or another rocm install path
|
||||
# fall back to ROCM_DIR
|
||||
list(APPEND CMAKE_PREFIX_PATH ../../../ ${ROCM_DIR})
|
||||
list(APPEND CMAKE_LIBRARY_PATH ${ROCM_DIR}/${CMAKE_INSTALL_LIBDIR})
|
||||
|
||||
find_package(amd_smi CONFIG REQUIRED)
|
||||
|
||||
message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&")
|
||||
message(" Finished Cmake Example ")
|
||||
message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&")
|
||||
|
||||
# compile example files but do not install
|
||||
# this is only useful if running from build directory
|
||||
set(SMI_DRM_EXAMPLE_EXE "amd_smi_drm_ex")
|
||||
add_executable(${SMI_DRM_EXAMPLE_EXE} "amd_smi_drm_example.cc")
|
||||
target_link_libraries(${SMI_DRM_EXAMPLE_EXE} amd_smi)
|
||||
|
||||
set(SMI_NODRM_EXAMPLE_EXE "amd_smi_nodrm_ex")
|
||||
add_executable(${SMI_NODRM_EXAMPLE_EXE} "amd_smi_nodrm_example.cc")
|
||||
target_link_libraries(${SMI_NODRM_EXAMPLE_EXE} amd_smi)
|
||||
|
||||
if(ENABLE_ESMI_LIB)
|
||||
set(ESMI_SAMPLE_EXE "amd_smi_esmi_ex")
|
||||
add_executable(${ESMI_SAMPLE_EXE} "amdsmi_esmi_intg_example.cc")
|
||||
target_link_libraries(${ESMI_SAMPLE_EXE} amd_smi)
|
||||
target_compile_definitions(${ESMI_SAMPLE_EXE} PUBLIC ENABLE_ESMI_LIB)
|
||||
endif()
|
||||
@@ -0,0 +1,49 @@
|
||||
# Copyright (C) Advanced Micro Devices. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
# this software and associated documentation files (the "Software"), to deal in
|
||||
# the Software without restriction, including without limitation the rights to
|
||||
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
# the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
# subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
from amdsmi import *
|
||||
import os
|
||||
|
||||
amdsmi_init()
|
||||
|
||||
def amdsmi_get_afids_from_cper():
|
||||
directory_path = "/tmp/cper_dump/"
|
||||
print(f"Searching for cper file in {directory_path}")
|
||||
with os.scandir(directory_path) as cper_files:
|
||||
for cper_file in cper_files:
|
||||
if cper_file.is_file(): # Check if the entry is a file (not a subdirectory)
|
||||
if ".bin" in cper_file.path:
|
||||
print(f"Found {cper_file.path}")
|
||||
with open(cper_file.path, "rb") as file:
|
||||
raw = file.read()
|
||||
afids, num_afids = amdsmi_interface.amdsmi_get_afids_from_cper(raw)
|
||||
print(f"afids: {afids}")
|
||||
|
||||
amdsmi_get_afids_from_cper()
|
||||
|
||||
"""
|
||||
Sample output:
|
||||
|
||||
sudo python3 afid.py
|
||||
Searching for cper file in /tmp/cper_dump/
|
||||
Found /tmp/cper_dump/cper_entry_0.bin
|
||||
afids: [17]
|
||||
Found /tmp/cper_dump/cper_entry_1.bin
|
||||
afids: [17]
|
||||
"""
|
||||
@@ -0,0 +1,126 @@
|
||||
# Copyright (C) Advanced Micro Devices. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
# this software and associated documentation files (the "Software"), to deal in
|
||||
# the Software without restriction, including without limitation the rights to
|
||||
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
# the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
# subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
from amdsmi import *
|
||||
import os
|
||||
|
||||
amdsmi_init()
|
||||
|
||||
def get_severity_mask(severity):
|
||||
severity_mask = 0
|
||||
if severity == "all":
|
||||
# Set bits for NON_FATAL_UNCORRECTED (0), FATAL (1), and NON_FATAL_CORRECTED (2)
|
||||
severity_mask |= ((1 << 0) | (1 << 1) | (1 << 2))
|
||||
elif severity == "fatal":
|
||||
# Set bit corresponding to AMDSMI_CPER_SEV_FATAL (which is 1)
|
||||
severity_mask |= (1 << 1)
|
||||
elif severity in ("nonfatal", "nonfatal-uncorrected"):
|
||||
# Set bit corresponding to AMDSMI_CPER_SEV_NON_FATAL_UNCORRECTED (which is 0)
|
||||
severity_mask |= (1 << 0)
|
||||
elif severity in ("nonfatal-corrected", "corrected"):
|
||||
# Set bit corresponding to AMDSMI_CPER_SEV_NON_FATAL_CORRECTED (which is 2)
|
||||
severity_mask |= (1 << 2)
|
||||
return severity_mask
|
||||
|
||||
def gpuid(device):
|
||||
for gpu_index, device_handle in enumerate(amdsmi_interface.amdsmi_get_processor_handles()):
|
||||
if device.value == device_handle.value:
|
||||
return gpu_index
|
||||
|
||||
def dump_cper_entry(entry, cper_data, key):
|
||||
try:
|
||||
os.mkdir("/tmp/cper_dump", mode=0o777, dir_fd=None)
|
||||
except FileExistsError:
|
||||
pass
|
||||
cper_file = f"/tmp/cper_dump/cper_entry_{key}.bin"
|
||||
with open(cper_file, "wb") as file:
|
||||
size = cper_data[key]["size"]
|
||||
data = cper_data[key]["bytes"]
|
||||
data = bytes(x % 256 for x in data[:size])
|
||||
file.write(data)
|
||||
print(f" Wrote cper data to file: {cper_file}")
|
||||
json_file = f"/tmp/cper_dump/cper_entry_{key}.json"
|
||||
with open(json_file, "wt") as file:
|
||||
file.write(str(entry))
|
||||
|
||||
def get_gpu_cper_entries():
|
||||
try:
|
||||
devices = amdsmi_interface.amdsmi_get_processor_handles()
|
||||
buffer_size = 1024*100
|
||||
initial_cursor = 0
|
||||
severity = "all"
|
||||
for device in devices:
|
||||
while True:
|
||||
entries, new_cursor, cper_data, status_code = amdsmi_get_gpu_cper_entries(
|
||||
device, get_severity_mask(severity), buffer_size, initial_cursor)
|
||||
gpu_id = gpuid(device)
|
||||
print("#############################################################################")
|
||||
print(f"cper entries for severity: '{severity}', gpu #{gpu_id}, cursor: {initial_cursor}-{new_cursor - 1}")
|
||||
for key, entry in entries.items():
|
||||
print("----------------")
|
||||
print("Entry", initial_cursor + key)
|
||||
print(" Error Severity:", entry.get("error_severity", "Unknown"))
|
||||
print(" Notify Type:", entry.get("notify_type", "Unknown"))
|
||||
print(" Timestamp:", entry.get("timestamp", ""))
|
||||
print(f" Cper entry metadata: {entry}")
|
||||
dump_cper_entry(entry, cper_data, key)
|
||||
if initial_cursor == new_cursor:
|
||||
break
|
||||
initial_cursor = new_cursor
|
||||
break
|
||||
except AmdSmiException as e:
|
||||
print(e)
|
||||
|
||||
get_gpu_cper_entries()
|
||||
|
||||
"""
|
||||
Sample output:
|
||||
|
||||
cper entries for severity: 'all', gpu #0, cursor: 0-3
|
||||
----------------
|
||||
Entry 0
|
||||
Error Severity: non_fatal_corrected
|
||||
Notify Type: CMC
|
||||
Timestamp: 2025/09/07 00:14:22
|
||||
Cper entry metadata: {'error_severity': 'non_fatal_corrected', 'notify_type': 'CMC', 'timestamp': '2025/09/07 00:14:22', 'signature': b'CPER', 'revision': 256, 'signature_end': '0xffffffff', 'sec_cnt': 1, 'record_length': 472, 'platform_id': b'0x1002:0x74A2', 'creator_id': b'amdgpu', 'record_id': b'5:1', 'flags': 0, 'persistence_info': 0}
|
||||
Wrote cper data to file: /tmp/cper_dump/cper_entry_0.bin
|
||||
----------------
|
||||
Entry 1
|
||||
Error Severity: non_fatal_corrected
|
||||
Notify Type: CMC
|
||||
Timestamp: 2025/09/07 00:14:26
|
||||
Cper entry metadata: {'error_severity': 'non_fatal_corrected', 'notify_type': 'CMC', 'timestamp': '2025/09/07 00:14:26', 'signature': b'CPER', 'revision': 256, 'signature_end': '0xffffffff', 'sec_cnt': 1, 'record_length': 472, 'platform_id': b'0x1002:0x74A2', 'creator_id': b'amdgpu', 'record_id': b'5:2', 'flags': 0, 'persistence_info': 0}
|
||||
Wrote cper data to file: /tmp/cper_dump/cper_entry_1.bin
|
||||
----------------
|
||||
Entry 2
|
||||
Error Severity: non_fatal_corrected
|
||||
Notify Type: CMC
|
||||
Timestamp: 2025/09/08 06:12:11
|
||||
Cper entry metadata: {'error_severity': 'non_fatal_corrected', 'notify_type': 'CMC', 'timestamp': '2025/09/08 06:12:11', 'signature': b'CPER', 'revision': 256, 'signature_end': '0xffffffff', 'sec_cnt': 1, 'record_length': 472, 'platform_id': b'0x1002:0x74A2', 'creator_id': b'amdgpu', 'record_id': b'5:3', 'flags': 0, 'persistence_info': 0}
|
||||
Wrote cper data to file: /tmp/cper_dump/cper_entry_2.bin
|
||||
----------------
|
||||
Entry 3
|
||||
Error Severity: non_fatal_corrected
|
||||
Notify Type: CMC
|
||||
Timestamp: 2025/09/08 06:13:59
|
||||
Cper entry metadata: {'error_severity': 'non_fatal_corrected', 'notify_type': 'CMC', 'timestamp': '2025/09/08 06:13:59', 'signature': b'CPER', 'revision': 256, 'signature_end': '0xffffffff', 'sec_cnt': 1, 'record_length': 472, 'platform_id': b'0x1002:0x74A2', 'creator_id': b'amdgpu', 'record_id': b'5:4', 'flags': 0, 'persistence_info': 0}
|
||||
Wrote cper data to file: /tmp/cper_dump/cper_entry_3.bin
|
||||
#############################################################################
|
||||
cper entries for severity: 'all', gpu #0, cursor: 4-3
|
||||
"""
|
||||
File diff ditekan karena terlalu besar
Load Diff
@@ -0,0 +1,374 @@
|
||||
/*
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <pwd.h>
|
||||
#include <cinttypes>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
#include "amd_smi/amdsmi.h"
|
||||
|
||||
#define CHK_AMDSMI_RET(RET) \
|
||||
{ \
|
||||
if (RET != AMDSMI_STATUS_SUCCESS) { \
|
||||
const char *err_str; \
|
||||
amdsmi_status_code_to_string(RET, &err_str); \
|
||||
std::cout << "AMDSMI call returned " << RET << " at line " \
|
||||
<< __LINE__ << ": " << err_str << std::endl; \
|
||||
if (RET != AMDSMI_STATUS_NOT_SUPPORTED && RET != AMDSMI_STATUS_INVAL) { \
|
||||
return RET; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
int main() {
|
||||
amdsmi_status_t ret;
|
||||
|
||||
// Init amdsmi for sockets and devices.
|
||||
// Here we are only interested in AMD_GPUS.
|
||||
ret = amdsmi_init(AMDSMI_INIT_AMD_GPUS);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
|
||||
// Get all sockets
|
||||
uint32_t socket_count = 0;
|
||||
|
||||
// Get the socket count available for the system.
|
||||
ret = amdsmi_get_socket_handles(&socket_count, nullptr);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
|
||||
// Allocate the memory for the sockets
|
||||
std::vector<amdsmi_socket_handle> sockets(socket_count);
|
||||
// Get the sockets of the system
|
||||
ret = amdsmi_get_socket_handles(&socket_count, &sockets[0]);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
|
||||
std::cout << "Total Socket: " << socket_count << std::endl;
|
||||
|
||||
// For each socket, get identifier and devices
|
||||
for (uint32_t i = 0; i < socket_count; i++) {
|
||||
// Get Socket info
|
||||
char socket_info[128];
|
||||
ret = amdsmi_get_socket_info(sockets[i], 128, socket_info);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
std::cout << "Socket " << socket_info << std::endl;
|
||||
|
||||
// Get the device count available for the socket.
|
||||
uint32_t device_count = 0;
|
||||
ret = amdsmi_get_processor_handles(sockets[i], &device_count, nullptr);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
|
||||
// Allocate the memory for the device handlers on the socket
|
||||
std::vector<amdsmi_processor_handle> processor_handles(device_count);
|
||||
// Get all devices of the socket
|
||||
ret = amdsmi_get_processor_handles(sockets[i],
|
||||
&device_count, &processor_handles[0]);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
|
||||
// For each device of the socket, get name and temperature.
|
||||
for (uint32_t j = 0; j < device_count; j++) {
|
||||
// Get device type. Since the amdsmi is initialized with
|
||||
// AMD_SMI_INIT_AMD_GPUS, the processor_type must be AMDSMI_PROCESSOR_TYPE_AMD_GPU.
|
||||
processor_type_t processor_type = {};
|
||||
ret = amdsmi_get_processor_type(processor_handles[j], &processor_type);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
if (processor_type != AMDSMI_PROCESSOR_TYPE_AMD_GPU) {
|
||||
std::cout << "Expect AMDSMI_PROCESSOR_TYPE_AMD_GPU device type!\n";
|
||||
return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
amdsmi_ras_feature_t ras_feature;
|
||||
ret = amdsmi_get_gpu_ras_feature_info(
|
||||
processor_handles[j] ,&ras_feature);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
if (ret != AMDSMI_STATUS_NOT_SUPPORTED) {
|
||||
printf("\tras_feature: version: %x, schema: %x\n",
|
||||
ras_feature.ras_eeprom_version, ras_feature.ecc_correction_schema_flag);
|
||||
}
|
||||
|
||||
|
||||
amdsmi_bdf_t bdf = {};
|
||||
ret = amdsmi_get_gpu_device_bdf(processor_handles[j], &bdf);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
printf(" Output of amdsmi_get_gpu_device_bdf:\n");
|
||||
printf("\tDevice[%d] BDF %04" PRIx64 ":%02" PRIx32 ":%02" PRIx32 ".%" PRIu32 "\n\n", i,
|
||||
static_cast<uint64_t>(bdf.domain_number),
|
||||
static_cast<uint32_t>(bdf.bus_number),
|
||||
static_cast<uint32_t>(bdf.device_number),
|
||||
static_cast<uint32_t>(bdf.function_number));
|
||||
|
||||
amdsmi_asic_info_t asic_info = {};
|
||||
ret = amdsmi_get_gpu_asic_info(processor_handles[j], &asic_info);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
printf(" Output of amdsmi_get_gpu_asic_info:\n");
|
||||
printf("\tMarket Name: %s\n", asic_info.market_name);
|
||||
printf("\tDeviceID: 0x%lx\n", asic_info.device_id);
|
||||
printf("\tVendorID: 0x%x\n", asic_info.vendor_id);
|
||||
printf("\tRevisionID: 0x%x\n", asic_info.rev_id);
|
||||
printf("\tSubSystemID: 0x%x\n", asic_info.subsystem_id);
|
||||
printf("\tAsic serial: 0x%s\n", asic_info.asic_serial);
|
||||
printf("\tOAM id: 0x%x\n", asic_info.oam_id);
|
||||
printf("\tNum of Computes: %d\n\n", asic_info.num_of_compute_units);
|
||||
|
||||
// Get VBIOS info
|
||||
amdsmi_vbios_info_t vbios_info = {};
|
||||
ret = amdsmi_get_gpu_vbios_info(processor_handles[j], &vbios_info);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
printf(" Output of amdsmi_get_gpu_vbios_info:\n");
|
||||
printf("\tVBIOS/IFWI Name: %s\n", vbios_info.name);
|
||||
printf("\tVBIOS/IFWI Build Date: %s\n", vbios_info.build_date);
|
||||
printf("\tVBIOS/IFWI Part Number: %s\n", vbios_info.part_number);
|
||||
printf("\tVBIOS/IFWI Version String: %s\n\n", vbios_info.version);
|
||||
printf("\tVBIOS/IFWI Boot Firmware: %s\n\n", vbios_info.boot_firmware);
|
||||
|
||||
// Get engine usage info
|
||||
amdsmi_engine_usage_t engine_usage = {};
|
||||
ret = amdsmi_get_gpu_activity(processor_handles[j], &engine_usage);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
printf(" Output of amdsmi_get_gpu_activity:\n");
|
||||
printf("\tAverage GFX Activity: %d\n",
|
||||
engine_usage.gfx_activity);
|
||||
printf("\tAverage MM Activity: %d\n",
|
||||
engine_usage.mm_activity);
|
||||
printf("\tAverage UMC Activity: %d\n\n",
|
||||
engine_usage.umc_activity);
|
||||
|
||||
// Get firmware info
|
||||
amdsmi_fw_info_t fw_information = {};
|
||||
ret = amdsmi_get_fw_info(processor_handles[j], &fw_information);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
printf(" Output of amdsmi_get_fw_info:\n");
|
||||
printf("\tFirmware version: %d\n", fw_information.num_fw_info);
|
||||
printf("\tSMU: %ld\n",
|
||||
fw_information.fw_info_list[amdsmi_fw_block_t::AMDSMI_FW_ID_SMU]
|
||||
.fw_version);
|
||||
printf("\tPM: %ld\n",
|
||||
fw_information.fw_info_list[amdsmi_fw_block_t::AMDSMI_FW_ID_PM]
|
||||
.fw_version);
|
||||
printf("\tVCN: %ld\n",
|
||||
fw_information.fw_info_list[amdsmi_fw_block_t::AMDSMI_FW_ID_VCN]
|
||||
.fw_version);
|
||||
printf("\tCP_ME: %ld\n",
|
||||
fw_information.fw_info_list[amdsmi_fw_block_t::AMDSMI_FW_ID_CP_ME]
|
||||
.fw_version);
|
||||
printf("\tCP_PFP: %ld\n",
|
||||
fw_information.fw_info_list[amdsmi_fw_block_t::AMDSMI_FW_ID_CP_PFP]
|
||||
.fw_version);
|
||||
printf("\tCP_CE: %ld\n",
|
||||
fw_information.fw_info_list[amdsmi_fw_block_t::AMDSMI_FW_ID_CP_CE]
|
||||
.fw_version);
|
||||
printf("\tRLC: %ld\n",
|
||||
fw_information.fw_info_list[amdsmi_fw_block_t::AMDSMI_FW_ID_RLC]
|
||||
.fw_version);
|
||||
printf("\tCP_MEC1: %ld\n",
|
||||
fw_information.fw_info_list[amdsmi_fw_block_t::AMDSMI_FW_ID_CP_MEC1]
|
||||
.fw_version);
|
||||
printf("\tCP_MEC2: %ld\n",
|
||||
fw_information.fw_info_list[amdsmi_fw_block_t::AMDSMI_FW_ID_CP_MEC2]
|
||||
.fw_version);
|
||||
printf("\tSDMA0: %ld\n",
|
||||
fw_information.fw_info_list[amdsmi_fw_block_t::AMDSMI_FW_ID_SDMA0]
|
||||
.fw_version);
|
||||
printf("\tMC: %ld\n",
|
||||
fw_information.fw_info_list[amdsmi_fw_block_t::AMDSMI_FW_ID_MC]
|
||||
.fw_version);
|
||||
printf("\tRLC RESTORE LIST CNTL: %ld\n",
|
||||
fw_information
|
||||
.fw_info_list
|
||||
[amdsmi_fw_block_t::AMDSMI_FW_ID_RLC_RESTORE_LIST_CNTL]
|
||||
.fw_version);
|
||||
printf("\tRLC RESTORE LIST GPM MEM: %ld\n",
|
||||
fw_information
|
||||
.fw_info_list
|
||||
[amdsmi_fw_block_t::AMDSMI_FW_ID_RLC_RESTORE_LIST_GPM_MEM]
|
||||
.fw_version);
|
||||
printf("\tRLC RESTORE LIST SRM MEM: %ld\n",
|
||||
fw_information
|
||||
.fw_info_list
|
||||
[amdsmi_fw_block_t::AMDSMI_FW_ID_RLC_RESTORE_LIST_SRM_MEM]
|
||||
.fw_version);
|
||||
printf(
|
||||
"\tPSP SOSDRV: %ld\n\n",
|
||||
fw_information.fw_info_list[amdsmi_fw_block_t::AMDSMI_FW_ID_PSP_SOSDRV]
|
||||
.fw_version);
|
||||
printf(
|
||||
"\tPLDM BUNDLE: %ld\n\n",
|
||||
fw_information.fw_info_list[amdsmi_fw_block_t::AMDSMI_FW_ID_PLDM_BUNDLE]
|
||||
.fw_version);
|
||||
|
||||
// Get temperature measurements
|
||||
int64_t temp_measurements[AMDSMI_TEMPERATURE_TYPE__MAX + 1];
|
||||
amdsmi_temperature_type_t temp_types[4] = {
|
||||
AMDSMI_TEMPERATURE_TYPE_EDGE, AMDSMI_TEMPERATURE_TYPE_HOTSPOT,
|
||||
AMDSMI_TEMPERATURE_TYPE_VRAM, AMDSMI_TEMPERATURE_TYPE_PLX};
|
||||
for (const auto &temp_type : temp_types) {
|
||||
ret = amdsmi_get_temp_metric(
|
||||
processor_handles[j], temp_type,
|
||||
AMDSMI_TEMP_CURRENT,
|
||||
&temp_measurements[(int)(temp_type)]);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
}
|
||||
printf(" Output of amdsmi_get_temp_metric:\n");
|
||||
printf("\tGPU Edge temp measurement: %ld\n",
|
||||
temp_measurements[AMDSMI_TEMPERATURE_TYPE_EDGE]);
|
||||
printf("\tGPU Hotspot temp measurement: %ld\n",
|
||||
temp_measurements[AMDSMI_TEMPERATURE_TYPE_HOTSPOT]);
|
||||
printf("\tGPU VRAM temp measurement: %ld\n",
|
||||
temp_measurements[AMDSMI_TEMPERATURE_TYPE_VRAM]);
|
||||
printf("\tGPU PLX temp measurement: %ld\n\n",
|
||||
temp_measurements[AMDSMI_TEMPERATURE_TYPE_PLX]);
|
||||
|
||||
// Get bad pages
|
||||
char bad_page_status_names[3][15] = {"RESERVED", "PENDING",
|
||||
"UNRESERVABLE"};
|
||||
uint32_t num_pages = 0;
|
||||
std::vector<amdsmi_retired_page_record_t> bad_page_info(num_pages);
|
||||
ret = amdsmi_get_gpu_bad_page_info(processor_handles[j], &num_pages,
|
||||
bad_page_info.data());
|
||||
std::cout << "num_pages = " << num_pages << "\n";
|
||||
CHK_AMDSMI_RET(ret)
|
||||
printf(" Output of amdsmi_get_gpu_bad_page_info:\n");
|
||||
if (!num_pages) {
|
||||
printf("\tNo bad pages found.\n");
|
||||
} else {
|
||||
std::vector<amdsmi_retired_page_record_t> bad_page_info(num_pages);
|
||||
ret = amdsmi_get_gpu_bad_page_info(processor_handles[j], &num_pages,
|
||||
bad_page_info.data());
|
||||
CHK_AMDSMI_RET(ret)
|
||||
for (uint32_t page_it = 0; page_it < num_pages; page_it += 1) {
|
||||
printf(" Page[%d]\n", page_it);
|
||||
printf("\tAddress: %lu\n",
|
||||
bad_page_info[page_it].page_address);
|
||||
printf("\tSize: %lu\n", bad_page_info[page_it].page_size);
|
||||
printf(
|
||||
"\tStatus: %s\n",
|
||||
bad_page_status_names[bad_page_info[page_it].status]);
|
||||
}
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
// Get ECC error counts
|
||||
amdsmi_error_count_t err_cnt_info = {};
|
||||
ret = amdsmi_get_gpu_total_ecc_count(processor_handles[j], &err_cnt_info);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
printf(" Output of amdsmi_get_gpu_total_ecc_count:\n");
|
||||
printf("\tCorrectable errors: %lu\n", err_cnt_info.correctable_count);
|
||||
printf("\tUncorrectable errors: %lu\n\n",
|
||||
err_cnt_info.uncorrectable_count);
|
||||
|
||||
// Get device name
|
||||
amdsmi_board_info_t board_info = {};
|
||||
ret = amdsmi_get_gpu_board_info(processor_handles[j], &board_info);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
printf(" Output of amdsmi_get_gpu_board_info:\n");
|
||||
std::cout << "\tdevice [" << j
|
||||
<< "]\n\t\tProduct name: " << board_info.product_name
|
||||
<< "\n"
|
||||
<< "\t\tModel Number: " << board_info.model_number
|
||||
<< "\n"
|
||||
<< "\t\tBoard Serial: " << board_info.product_serial
|
||||
<< "\n"
|
||||
<< "\t\tManufacturer Name: " << board_info.manufacturer_name
|
||||
<< "\n\n";
|
||||
|
||||
// Get temperature
|
||||
int64_t val_i64 = 0;
|
||||
ret = amdsmi_get_temp_metric(processor_handles[j], AMDSMI_TEMPERATURE_TYPE_EDGE,
|
||||
AMDSMI_TEMP_CURRENT, &val_i64);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
printf(" Output of amdsmi_get_temp_metric:\n");
|
||||
std::cout << "\t\tTemperature: " << val_i64 << "C"
|
||||
<< "\n\n";
|
||||
|
||||
// Get frame buffer
|
||||
amdsmi_vram_usage_t vram_usage = {};
|
||||
ret = amdsmi_get_gpu_vram_usage(processor_handles[j], &vram_usage);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
printf(" Output of amdsmi_get_gpu_vram_usage:\n");
|
||||
std::cout << "\t\tFrame buffer usage (MB): " << vram_usage.vram_used
|
||||
<< "/" << vram_usage.vram_total << "\n\n";
|
||||
|
||||
amdsmi_power_cap_info_t cap_info = {};
|
||||
ret = amdsmi_get_power_cap_info(processor_handles[j], 0, &cap_info);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
printf(" Output of amdsmi_get_power_cap_info:\n");
|
||||
std::cout << "\t\t Power Cap: " << cap_info.power_cap / 1000000
|
||||
<< "W\n\n";
|
||||
|
||||
amdsmi_dpm_policy_t policy;
|
||||
ret = amdsmi_get_soc_pstate(processor_handles[j], &policy);
|
||||
if (ret != AMDSMI_STATUS_NOT_SUPPORTED) {
|
||||
CHK_AMDSMI_RET(ret)
|
||||
std::cout << "\t amdsmi_get_soc_pstate total:" << policy.num_supported
|
||||
<<" current:" << policy.current << "\n";
|
||||
for (uint32_t x=0; x < policy.num_supported; x++) {
|
||||
std::cout << x <<": (" << policy.policies[x].policy_id
|
||||
<<"," << policy.policies[x].policy_description << ")\n";
|
||||
}
|
||||
}
|
||||
|
||||
// Get nearest GPUs
|
||||
const char *topology_link_type_str[] = {
|
||||
"AMDSMI_LINK_TYPE_INTERNAL",
|
||||
"AMDSMI_LINK_TYPE_PCIE",
|
||||
"AMDSMI_LINK_TYPE_XGMI",
|
||||
"AMDSMI_LINK_TYPE_NOT_APPLICABLE",
|
||||
"AMDSMI_LINK_TYPE_UNKNOWN",
|
||||
};
|
||||
printf("\tOutput of amdsmi_get_link_topology_nearest:\n");
|
||||
for (uint32_t topo_link_type = AMDSMI_LINK_TYPE_INTERNAL; topo_link_type <= AMDSMI_LINK_TYPE_UNKNOWN; topo_link_type++) {
|
||||
auto topology_nearest_info = amdsmi_topology_nearest_t();
|
||||
ret = amdsmi_get_link_topology_nearest(processor_handles[j],
|
||||
static_cast<amdsmi_link_type_t>(topo_link_type),
|
||||
nullptr);
|
||||
CHK_AMDSMI_RET(ret);
|
||||
ret = amdsmi_get_link_topology_nearest(processor_handles[j],
|
||||
static_cast<amdsmi_link_type_t>(topo_link_type),
|
||||
&topology_nearest_info);
|
||||
CHK_AMDSMI_RET(ret);
|
||||
printf("\tNearest GPUs found at %s\n", topology_link_type_str[topo_link_type]);
|
||||
for (uint32_t k = 0; k < topology_nearest_info.count; k++) {
|
||||
amdsmi_bdf_t bdf = {};
|
||||
ret = amdsmi_get_gpu_device_bdf(topology_nearest_info.processor_list[k], &bdf);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
printf("\tGPU BDF %04" PRIx64 ":%02" PRIx32 ":%02" PRIx32 ".%" PRIu32 "\n",
|
||||
static_cast<uint64_t>(bdf.domain_number),
|
||||
static_cast<uint32_t>(bdf.bus_number),
|
||||
static_cast<uint32_t>(bdf.device_number),
|
||||
static_cast<uint32_t>(bdf.function_number));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Clean up resources allocated at amdsmi_init. It will invalidate sockets
|
||||
// and devices pointers
|
||||
ret = amdsmi_shut_down();
|
||||
CHK_AMDSMI_RET(ret)
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,301 @@
|
||||
/*
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <unistd.h>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include "amd_smi/amdsmi.h"
|
||||
#include <cstring>
|
||||
#include <cmath>
|
||||
|
||||
#define SHOWLINESZ 256
|
||||
|
||||
#define CHK_AMDSMI_RET(RET) \
|
||||
{ \
|
||||
if (RET != AMDSMI_STATUS_SUCCESS) { \
|
||||
const char *err_str; \
|
||||
amdsmi_status_t status; \
|
||||
status = amdsmi_get_esmi_err_msg(RET, &err_str); \
|
||||
std::cout << "AMDSMI call returned " << status << " at line " \
|
||||
<< __LINE__ << std::endl; \
|
||||
std::cout << err_str << std::endl; \
|
||||
return RET; \
|
||||
} \
|
||||
}
|
||||
|
||||
using std::cin;
|
||||
using std::cout;
|
||||
using std::endl;
|
||||
using std::fixed;
|
||||
using std::setprecision;
|
||||
using std::vector;
|
||||
|
||||
int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv) {
|
||||
amdsmi_status_t ret;
|
||||
uint32_t proto_ver;
|
||||
amdsmi_smu_fw_version_t smu_fw = {};
|
||||
|
||||
// Initialize esmi for AMD CPUs
|
||||
ret = amdsmi_init(AMDSMI_INIT_AMD_CPUS);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
|
||||
// Get all sockets
|
||||
uint32_t socket_count = 0;
|
||||
|
||||
ret = amdsmi_get_socket_handles(&socket_count, nullptr);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
|
||||
// Allocate the memory for the sockets
|
||||
vector<amdsmi_socket_handle> sockets(socket_count);
|
||||
|
||||
// Get the sockets of the system
|
||||
ret = amdsmi_get_socket_handles(&socket_count, &sockets[0]);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
|
||||
cout << "Total Socket: " << socket_count << endl;
|
||||
|
||||
// For each socket, get cpus and cores
|
||||
for (uint32_t i = 0; i < socket_count; i++) {
|
||||
cout << endl << "Socket " << i << endl;
|
||||
uint32_t cpu_count = 0;
|
||||
uint32_t core_count = 0;
|
||||
|
||||
// Set processor type as AMDSMI_PROCESSOR_TYPE_AMD_CPU
|
||||
processor_type_t processor_type = AMDSMI_PROCESSOR_TYPE_AMD_CPU;
|
||||
ret = amdsmi_get_processor_handles_by_type(sockets[i], processor_type, nullptr, &cpu_count);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
|
||||
// Allocate the memory for the cpus
|
||||
vector<amdsmi_processor_handle> plist(cpu_count);
|
||||
|
||||
// Get the cpus for each socket
|
||||
ret = amdsmi_get_processor_handles_by_type(sockets[i], processor_type, &plist[0], &cpu_count);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
|
||||
// Set processor type as AMDSMI_PROCESSOR_TYPE_AMD_CPU_CORE
|
||||
processor_type = AMDSMI_PROCESSOR_TYPE_AMD_CPU_CORE;
|
||||
ret = amdsmi_get_processor_handles_by_type(sockets[i], processor_type, nullptr, &core_count);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
|
||||
// Allocate the memory for the cpu cores
|
||||
vector<amdsmi_processor_handle> core_list(core_count);
|
||||
|
||||
// Get the cpu cores for each socket
|
||||
ret = amdsmi_get_processor_handles_by_type(sockets[i], processor_type, &core_list[0], &core_count);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
|
||||
for (uint32_t index = 0; index < plist.size(); index++) {
|
||||
ret = amdsmi_get_cpu_hsmp_proto_ver(plist[index], &proto_ver);
|
||||
if(ret != AMDSMI_STATUS_SUCCESS)
|
||||
cout<<"Failed to get hsmp proto version"<<"["<<index<<"] , Err["<<ret<<"] "<< endl;
|
||||
|
||||
cout<<"\n------------------------------------------";
|
||||
cout<<"\n| HSMP Proto Version | "<< proto_ver <<"\t\t |"<< endl;
|
||||
cout<<"------------------------------------------\n";
|
||||
|
||||
ret = amdsmi_get_cpu_smu_fw_version(plist[index], &smu_fw);
|
||||
if(ret != AMDSMI_STATUS_SUCCESS)
|
||||
cout<<"Failed to get smu fw version"<<"["<<index<<"] , Err["<<ret<<"] "<< endl;
|
||||
|
||||
cout<<"\n------------------------------------------";
|
||||
cout<<"\n| SMU FW Version | "
|
||||
<<(unsigned)smu_fw.major<<"."
|
||||
<<(unsigned)smu_fw.minor<<"."
|
||||
<<(unsigned)smu_fw.debug
|
||||
<<"\t\t |"<<endl;
|
||||
cout<<"------------------------------------------\n";
|
||||
|
||||
uint32_t err_bits = 0;
|
||||
|
||||
uint32_t prochot;
|
||||
cout<<setprecision(3)<<" CPU "<<index<<"\t|";
|
||||
cout<<"\n-------------------------------------------------";
|
||||
cout<<"\n| ProchotStatus:\t\t |";
|
||||
|
||||
ret = amdsmi_get_cpu_prochot_status(plist[index], &prochot);
|
||||
if(ret != AMDSMI_STATUS_SUCCESS)
|
||||
cout<<"Failed to get prochot status"<<"["<<index<<"] , Err["<<ret<<"] "<< endl;
|
||||
|
||||
if (!ret) {
|
||||
cout<<setprecision(7)<< (prochot ? "active" : "inactive")<<"\t|";
|
||||
} else {
|
||||
err_bits |= 1 << ret;
|
||||
cout<<" NA (Err:" <<ret<<" |";
|
||||
}
|
||||
cout<<"\n-------------------------------------------------\n";
|
||||
|
||||
size_t len;
|
||||
char str[SHOWLINESZ] = {};
|
||||
int retVal = 0;
|
||||
cout<<setprecision(3)<<" CPU "<<index<<"\t|";
|
||||
cout<<"\n-------------------------------------------------";
|
||||
cout<<"\n| fclk (Mhz)\t\t\t |";
|
||||
retVal = snprintf(str, SHOWLINESZ, "\n| mclk (Mhz)\t\t\t |");
|
||||
|
||||
len = strlen(str);
|
||||
uint32_t fclk, mclk;
|
||||
err_bits = 0;
|
||||
|
||||
ret = amdsmi_get_cpu_fclk_mclk(plist[index], &fclk, &mclk);
|
||||
if(ret != AMDSMI_STATUS_SUCCESS)
|
||||
cout<<"Failed to get cpu fclk mclk"<<"["<<index<<"] , Err["<<ret<<"] "<< endl;
|
||||
|
||||
if (!ret) {
|
||||
cout<<setprecision(7)<<" "<<fclk<<"\t\t|";
|
||||
retVal = snprintf(str + len, SHOWLINESZ - len, " %d\t\t|", mclk);
|
||||
} else {
|
||||
err_bits |= 1 << ret;
|
||||
cout<<" NA (Err: "<<setprecision(2)<<ret<<" |";
|
||||
retVal = snprintf(str + len, SHOWLINESZ - len, " NA (Err: %-2d) |", ret);
|
||||
}
|
||||
if (retVal > 0 && retVal < SHOWLINESZ)
|
||||
cout << str;
|
||||
else
|
||||
cout <<"error writing to buffer" << endl;
|
||||
|
||||
cout<<"\n-------------------------------------------------\n";
|
||||
|
||||
uint32_t socket_power;
|
||||
cout<<setprecision(3)<<" CPU "<<index<<"\t|";
|
||||
cout<<"\n-------------------------------------------------";
|
||||
cout<<"\n| Power (Watts)\t\t\t | ";
|
||||
|
||||
ret = amdsmi_get_cpu_socket_power(plist[index], &socket_power);
|
||||
if(ret != AMDSMI_STATUS_SUCCESS)
|
||||
cout<<"Failed to get cpu socket power"<<"["<<index<<"] , Err["<<ret<<"] "<< endl;
|
||||
|
||||
if (!ret) {
|
||||
cout<<fixed<<setprecision(3)<<static_cast<double>(socket_power)/1000<<"\t|";
|
||||
} else {
|
||||
err_bits |= 1 << ret;
|
||||
cout<<" NA (Err:" <<ret<<" |";
|
||||
}
|
||||
|
||||
uint32_t power_limit = 0;
|
||||
cout<<"\n| PowerLimit (Watts)\t\t | ";
|
||||
|
||||
ret = amdsmi_get_cpu_socket_power_cap(plist[index], &power_limit);
|
||||
if(ret != AMDSMI_STATUS_SUCCESS)
|
||||
cout<<"Failed to get cpu socket power cap"<<"["<<index<<"] , Err["<<ret<<"] "<< endl;
|
||||
|
||||
if (!ret) {
|
||||
cout<<fixed<<setprecision(3)<<static_cast<double>(power_limit)/1000<<"\t|";
|
||||
} else {
|
||||
err_bits |= 1 << ret;
|
||||
cout<<" NA (Err:" <<ret<<" |";
|
||||
}
|
||||
|
||||
uint32_t power_max = 0;
|
||||
cout<<"\n| PowerLimitMax (Watts)\t\t | ";
|
||||
|
||||
ret = amdsmi_get_cpu_socket_power_cap_max(plist[index], &power_max);
|
||||
if(ret != AMDSMI_STATUS_SUCCESS)
|
||||
cout<<"Failed to get cpu socket power cap max"<<"["<<index<<"] , Err["<<ret<<"] "<< endl;
|
||||
|
||||
if (!ret) {
|
||||
cout<<fixed<<setprecision(3)<<static_cast<double>(power_max)/1000<<"\t|";
|
||||
} else {
|
||||
err_bits |= 1 << ret;
|
||||
cout<<" NA (Err:" <<ret<<" |";
|
||||
}
|
||||
cout<<"\n-------------------------------------------------\n";
|
||||
|
||||
uint32_t input_power;
|
||||
power_max = 0;
|
||||
cout<<"\nEnter the max power to be set:\n";
|
||||
cin>>input_power;
|
||||
|
||||
ret = amdsmi_get_cpu_socket_power_cap_max(plist[index], &power_max);
|
||||
if(ret != AMDSMI_STATUS_SUCCESS)
|
||||
cout<<"Failed to get cpu socket power cap max"<<"["<<index<<"] , Err["<<ret<<"] "<< endl;
|
||||
|
||||
if ((ret == AMDSMI_STATUS_SUCCESS) && (input_power > power_max)) {
|
||||
cout<<"Input power is more than max power limit,"
|
||||
" limiting to "<<static_cast<double>(power_max)/1000<<"Watts\n";
|
||||
input_power = power_max;
|
||||
}
|
||||
|
||||
ret = amdsmi_set_cpu_socket_power_cap(plist[index], input_power);
|
||||
if(ret != AMDSMI_STATUS_SUCCESS)
|
||||
cout<<"Failed to set cpu socket power cap"<<"["<<index<<"] , Err["<<ret<<"] "<< endl;
|
||||
|
||||
if (!ret) {
|
||||
cout<<"CPU ["<<index<<"] power_limit set to "
|
||||
<<fixed<<setprecision(3)<<static_cast<double>(input_power)/1000<<" Watts successfully\n";
|
||||
}
|
||||
|
||||
power_limit = 0;
|
||||
cout<<"\n| PowerLimit (Watts) \t\t | ";
|
||||
|
||||
ret = amdsmi_get_cpu_socket_power_cap(plist[index], &power_limit);
|
||||
if(ret != AMDSMI_STATUS_SUCCESS)
|
||||
cout<<"Failed to get cpu socket power cap"<<"["<<index<<"] , Err["<<ret<<"] "<< endl;
|
||||
|
||||
if (!ret) {
|
||||
cout<<fixed<<setprecision(3)<<static_cast<double>(power_limit)/1000<<"\t|";
|
||||
} else {
|
||||
err_bits |= 1 << ret;
|
||||
cout<<" NA (Err:" <<ret<<" |";
|
||||
}
|
||||
cout<<"\n-------------------------------------------------\n";
|
||||
|
||||
double fraction_q10 = 1/pow(2,10);
|
||||
double fraction_uq10 = fraction_q10;
|
||||
|
||||
amdsmi_hsmp_metrics_table_t mtbl = {};
|
||||
ret = amdsmi_get_hsmp_metrics_table(plist[index], &mtbl);
|
||||
|
||||
if (ret != AMDSMI_STATUS_SUCCESS) {
|
||||
cout<<"Failed to get Metrics Table for CPU["<<index<<"], Err["<<ret<<"]" << endl;
|
||||
} else {
|
||||
cout<<"\n| METRICS TABLE \t\t\t\t |\n";
|
||||
|
||||
cout<<"\n| ACCUMULATOR COUNTER | "<<mtbl.accumulation_counter<<"\t\t|";
|
||||
cout<<"\n| SOCKET POWER LIMIT | "<<(mtbl.socket_power_limit * fraction_uq10)<<" W\t\t|";
|
||||
cout<<"\n| MAX SOCKET POWER LIMIT | "<<(mtbl.max_socket_power_limit * fraction_uq10)<<" W\t\t|";
|
||||
cout<<"\n| SOCKET POWER | "<<(mtbl.socket_power * fraction_uq10)<<" W\t\t|\n";
|
||||
|
||||
cout<<"\n| Effective frequency per AID: \t\t\t\t\t\t|";
|
||||
cout<<"\n-------------------------------------------------------------------------";
|
||||
cout<<"\n| AID | SOCCLK \t\t| VCLK \t\t| DCLK \t\t| LCLK \t\t|";
|
||||
cout<<"\n-------------------------------------------------------------------------";
|
||||
for(uint32_t j = 0; j < 4 ; j++){
|
||||
cout<<fixed<<setprecision(3)<<"\n| ["<<j<<"] | "
|
||||
<<(mtbl.socclk_frequency[j] * fraction_uq10)<<"MHz\t| "
|
||||
<<(mtbl.vclk_frequency[j] * fraction_uq10)<<"MHz\t| "
|
||||
<<(mtbl.dclk_frequency[j] * fraction_uq10)<<"MHz\t| "
|
||||
<<(mtbl.lclk_frequency[j] * fraction_uq10)<<"MHz\t| ";
|
||||
}
|
||||
cout<<"\n-------------------------------------------------------------------------\n";
|
||||
cout<<"\n-------------------------------------------------------------------------\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
// Clean up resources allocated at amdsmi_init
|
||||
ret = amdsmi_shut_down();
|
||||
CHK_AMDSMI_RET(ret)
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,724 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
/*
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package goamdsmi
|
||||
|
||||
/*
|
||||
#cgo CFLAGS: -Wall -I/opt/rocm/include
|
||||
#cgo LDFLAGS: -L/opt/rocm/lib -L/opt/rocm/lib64 -lgoamdsmi_shim64 -Wl,--unresolved-symbols=ignore-in-object-files
|
||||
#include <cstdint>
|
||||
#include <amdsmi_go_shim.h>
|
||||
*/
|
||||
import "C"
|
||||
|
||||
// ``GO_gpu_init`` initializes the GPU and reports whether the initialization was
|
||||
// successful. This function must be called before using other AMD SMI
|
||||
// functions.
|
||||
//
|
||||
// Output: ``bool``, returns true on success or false on fail.
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// import "github.com/ROCm/amdsmi"
|
||||
//
|
||||
// if true == goamdsmi.GO_gpu_init() {
|
||||
// GPU initialization is successful...
|
||||
// }
|
||||
func GO_gpu_init() (bool) {
|
||||
return bool(C.goamdsmi_gpu_init())
|
||||
}
|
||||
|
||||
// ``GO_gpu_shutdown`` shuts down the GPU and reports whether the shutdown was successful.
|
||||
//
|
||||
// Output: ``bool``, returns true on success or false on fail.
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// import "github.com/ROCm/amdsmi"
|
||||
//
|
||||
// if true == goamdsmi.GO_gpu_shutdown() {
|
||||
// GPU shutdown is successful...
|
||||
// }
|
||||
func GO_gpu_shutdown() (bool) {
|
||||
return bool(C.goamdsmi_gpu_shutdown())
|
||||
}
|
||||
|
||||
// ``GO_gpu_num_monitor_devices`` returns the number of GPU monitor devices
|
||||
// available.
|
||||
//
|
||||
// Output: ``uint``, returns the number of GPU monitor devices on success or 0 on
|
||||
// fail.
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// import "github.com/ROCm/amdsmi"
|
||||
//
|
||||
// if true == goamdsmi.GO_gpu_shutdown() {
|
||||
// GPU shutdown is successful...
|
||||
// }
|
||||
func GO_gpu_num_monitor_devices() (uint) {
|
||||
return uint(C.goamdsmi_gpu_num_monitor_devices())
|
||||
}
|
||||
|
||||
// ``GO_gpu_dev_name_get`` returns the name of the GPU device at the specified GPU
|
||||
// index.
|
||||
//
|
||||
// Input parameter: ``int``, GPU index.
|
||||
//
|
||||
// Output: ``char*``, returns GPU device name on success or "NA" on fail.
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// import "github.com/ROCm/amdsmi"
|
||||
//
|
||||
// if true == goamdsmi.GO_gpu_init() {
|
||||
// num_gpus := int(goamdsmi.GO_gpu_num_monitor_devices())
|
||||
// for i := 0; i < num_gpus; i++ {
|
||||
// goamdsmi.GO_gpu_dev_name_get(i)
|
||||
// }
|
||||
// }
|
||||
func GO_gpu_dev_name_get(i int) (*C.char) {
|
||||
return C.goamdsmi_gpu_dev_name_get(C.uint(i))
|
||||
}
|
||||
|
||||
// ``GO_gpu_dev_id_get`` returns the device ID of the GPU device at the specified GPU
|
||||
// index.
|
||||
//
|
||||
// Input parameter: ``int``, GPU index.
|
||||
//
|
||||
// Output: ``uint16``, returns GPU device ID on success or ``0xFFFF`` on fail.
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// import "github.com/ROCm/amdsmi"
|
||||
//
|
||||
// if true == goamdsmi.GO_gpu_init() {
|
||||
// num_gpus := int(goamdsmi.GO_gpu_num_monitor_devices())
|
||||
// for i := 0; i < num_gpus; i++ {
|
||||
// value16 := goamdsmi.GO_gpu_dev_id_get(i)
|
||||
// }
|
||||
// }
|
||||
func GO_gpu_dev_id_get(i int) (C.uint16_t) {
|
||||
return C.uint16_t(C.goamdsmi_gpu_dev_id_get(C.uint(i)))
|
||||
}
|
||||
|
||||
// ``GO_gpu_dev_pci_id_get`` returns the device PCI ID of the device at the
|
||||
// specified GPU index.
|
||||
//
|
||||
// Input parameter: ``int``, GPU index.
|
||||
//
|
||||
// Output: ``uint64``, returns GPU devices PCI ID on success or ``0xFFFFFFFFFFFFFFFF``
|
||||
// on fail.
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// import "github.com/ROCm/amdsmi"
|
||||
//
|
||||
// if true == goamdsmi.GO_gpu_init() {
|
||||
// dev_pci_id := int(goamdsmi.GO_gpu_dev_pci_id_get())
|
||||
// }
|
||||
func GO_gpu_dev_pci_id_get(i int) (C.uint64_t) {
|
||||
return C.goamdsmi_gpu_dev_pci_id_get(C.uint(i))
|
||||
}
|
||||
|
||||
// ``GO_gpu_dev_vbios_version_get`` returns the VBIOS version of the GPU device at the
|
||||
// specified GPU index.
|
||||
//
|
||||
// Input parameter: ``int``, GPU index.
|
||||
//
|
||||
// Output: ``char*``, returns VBIOS version on success or "NA" on fail.
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// import "github.com/ROCm/amdsmi"
|
||||
//
|
||||
// if true == goamdsmi.GO_gpu_init() {
|
||||
// dev_pci_id := int(goamdsmi.GO_gpu_dev_pci_id_get())
|
||||
// }
|
||||
func GO_gpu_dev_vbios_version_get(i int) (*C.char) {
|
||||
return C.goamdsmi_gpu_dev_vbios_version_get(C.uint(i))
|
||||
}
|
||||
|
||||
// ``GO_gpu_dev_vendor_name_get`` returns the vendor name of the GPU device at the
|
||||
// specified GPU index.
|
||||
//
|
||||
// Input parameter: ``int``, GPU index.
|
||||
//
|
||||
// Output: ``char*``, returns the GPU device name on success or "NA" on fail.
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// import "github.com/ROCm/amdsmi"
|
||||
//
|
||||
// if true == goamdsmi.GO_gpu_init() {
|
||||
// num_gpus := int(goamdsmi.GO_gpu_num_monitor_devices())
|
||||
// for i := 0; i < num_gpus; i++ {
|
||||
// goamdsmi.GO_gpu_dev_vendor_name_get()
|
||||
// }
|
||||
// }
|
||||
func GO_gpu_dev_vendor_name_get(i int) (*C.char) {
|
||||
return C.goamdsmi_gpu_dev_vendor_name_get(C.uint(i))
|
||||
}
|
||||
|
||||
// ``GO_gpu_dev_power_cap_get`` returns the power cap of the GPU at the specified
|
||||
// GPU index.
|
||||
//
|
||||
// Input parameter: ``int``, GPU index.
|
||||
//
|
||||
// Output: ``uint64``, returns GPU power cap on success or ``0xFFFFFFFFFFFFFFFF`` on
|
||||
// fail.
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// import "github.com/ROCm/amdsmi"
|
||||
//
|
||||
// if true == goamdsmi.GO_gpu_init() {
|
||||
// num_gpus := int(goamdsmi.GO_gpu_num_monitor_devices())
|
||||
// for i := 0; i < num_gpus; i++ {
|
||||
// dev_power_cap := int(goamdsmi.GO_gpu_dev_power_cap_get(i))
|
||||
// }
|
||||
// }
|
||||
func GO_gpu_dev_power_cap_get(i int) (C.uint64_t) {
|
||||
return C.goamdsmi_gpu_dev_power_cap_get(C.uint(i))
|
||||
}
|
||||
|
||||
// ``GO_gpu_dev_power_get`` returns the power of the GPU at the specified GPU index.
|
||||
//
|
||||
// Input parameter: ``int``, GPU index.
|
||||
//
|
||||
// Output: ``uint64``, returns GPU power on success or ``0xFFFFFFFFFFFFFFFF`` on fail.
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// import "github.com/ROCm/amdsmi"
|
||||
//
|
||||
// if true == goamdsmi.GO_gpu_init() {
|
||||
// num_gpus := int(goamdsmi.GO_gpu_num_monitor_devices())
|
||||
// for i := 0; i < num_gpus; i++ {
|
||||
// dev_power := int(goamdsmi.GO_gpu_dev_power_get(i))
|
||||
// }
|
||||
// }
|
||||
func GO_gpu_dev_power_get(i int) (C.uint64_t) {
|
||||
return C.goamdsmi_gpu_dev_power_get(C.uint(i))
|
||||
}
|
||||
|
||||
// ``GO_gpu_dev_temp_metric_get`` returns the temperature of the GPU at the
|
||||
// specified GPU index, sensor, and metric number.
|
||||
//
|
||||
// Input parameters:
|
||||
// - int, GPU index.
|
||||
// - int, sensor number.
|
||||
// - int, metric number.
|
||||
//
|
||||
// Output: ``uint64``, returns GPU temperature on success or ``0xFFFFFFFFFFFFFFFF`` on
|
||||
// fail.
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// import "github.com/ROCm/amdsmi"
|
||||
//
|
||||
// if true == goamdsmi.GO_gpu_init() {
|
||||
// num_gpus := int(goamdsmi.GO_gpu_num_monitor_devices())
|
||||
// for i := 0; i < num_gpus; i++ {
|
||||
// temp := int(goamdsmi.GO_gpu_dev_temp_metric_get(i, 1, 0))
|
||||
// }
|
||||
// }
|
||||
func GO_gpu_dev_temp_metric_get(i int, sensor int, metric int) (C.uint64_t) {
|
||||
return C.goamdsmi_gpu_dev_temp_metric_get(C.uint(i), C.uint(sensor), C.uint(metric))
|
||||
}
|
||||
|
||||
// ``GO_gpu_dev_perf_level_get`` returns the perf level of the GPU at the
|
||||
// specified GPU index.
|
||||
//
|
||||
// Input parameter: ``int``, GPU index.
|
||||
//
|
||||
// Output: ``uint32``, returns GPU perf level on success or ``0xFFFFFFFF`` on fail.
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// import "github.com/ROCm/amdsmi"
|
||||
//
|
||||
// if true == goamdsmi.GO_gpu_init() {
|
||||
// num_gpus := int(goamdsmi.GO_gpu_num_monitor_devices())
|
||||
// for i := 0; i < num_gpus; i++ {
|
||||
// dev_perf_level := int(goamdsmi.GO_gpu_dev_perf_level_get(i))
|
||||
// }
|
||||
// }
|
||||
func GO_gpu_dev_perf_level_get(i int) (C.uint32_t) {
|
||||
return C.goamdsmi_gpu_dev_perf_level_get(C.uint(i))
|
||||
}
|
||||
|
||||
// ``GO_gpu_dev_overdrive_level_get`` returns the overdrive level of the GPU at the
|
||||
// specified GPU index.
|
||||
//
|
||||
// Input parameter: ``int``, GPU index.
|
||||
//
|
||||
// Output: ``uint32``, returns GPU perf level on success or ``0xFFFFFFFF`` on fail.
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// import "github.com/ROCm/amdsmi"
|
||||
//
|
||||
// if true == goamdsmi.GO_gpu_init() {
|
||||
// num_gpus := int(goamdsmi.GO_gpu_num_monitor_devices())
|
||||
// for i := 0; i < num_gpus; i++ {
|
||||
// dev_overdrive_level := int(goamdsmi.GO_gpu_dev_overdrive_level_get(i))
|
||||
// }
|
||||
// }
|
||||
func GO_gpu_dev_overdrive_level_get(i int) (C.uint32_t) {
|
||||
return C.goamdsmi_gpu_dev_perf_level_get(C.uint(i))
|
||||
}
|
||||
|
||||
// ``GO_gpu_dev_mem_overdrive_level_get`` returns the mem overdrive level of the GPU at the
|
||||
// specified GPU index.
|
||||
//
|
||||
// Input parameter: ``int``, GPU index.
|
||||
//
|
||||
// Output: ``uint32``, returns GPU perf level on success or ``0xFFFFFFFF`` on fail.
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// import "github.com/ROCm/amdsmi"
|
||||
//
|
||||
// if true == goamdsmi.GO_gpu_init() {
|
||||
// num_gpus := int(goamdsmi.GO_gpu_num_monitor_devices())
|
||||
// for i := 0; i < num_gpus; i++ {
|
||||
// mem_overdrive_level := int(goamdsmi.GO_gpu_dev_mem_overdrive_level_get(i))
|
||||
// }
|
||||
// }
|
||||
func GO_gpu_dev_mem_overdrive_level_get(i int) (C.uint32_t) {
|
||||
return C.goamdsmi_gpu_dev_overdrive_level_get(C.uint(i))
|
||||
}
|
||||
|
||||
// ``GO_gpu_dev_gpu_clk_freq_get_sclk`` returns the system clock (SCLK) frequency of
|
||||
// the GPU at the specified GPU index.
|
||||
//
|
||||
// Input parameter: ``int``, GPU index.
|
||||
//
|
||||
// Output: ``uint64``, returns GPU SCLK frequency level on success or
|
||||
// ``0xFFFFFFFFFFFFFFFF`` on fail.
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// import "github.com/ROCm/amdsmi"
|
||||
//
|
||||
// if true == goamdsmi.GO_gpu_init() {
|
||||
// num_gpus := int(goamdsmi.GO_gpu_num_monitor_devices())
|
||||
// for i := 0; i < num_gpus; i++ {
|
||||
// dev_sclk_freq := int(goamdsmi.GO_gpu_dev_gpu_clk_freq_get_sclk(i))
|
||||
// }
|
||||
// }
|
||||
func GO_gpu_dev_gpu_clk_freq_get_sclk(i int) (C.uint64_t) {
|
||||
return C.goamdsmi_gpu_dev_gpu_clk_freq_get_sclk(C.uint(i))
|
||||
}
|
||||
|
||||
// ``GO_gpu_dev_gpu_clk_freq_get_mclk`` returns the memory clock (MCLK) frequency of
|
||||
// the GPU at the specified GPU index.
|
||||
//
|
||||
// Input parameter: ``int``, GPU index.
|
||||
//
|
||||
// Output: ``uint64``, returns GPU MCLK frequency level on success or
|
||||
// ``0xFFFFFFFFFFFFFFFF`` on fail.
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// import "github.com/ROCm/amdsmi"
|
||||
//
|
||||
// if true == goamdsmi.GO_gpu_init() {
|
||||
// num_gpus := int(goamdsmi.GO_gpu_num_monitor_devices())
|
||||
// for i := 0; i < num_gpus; i++ {
|
||||
// dev_sclk_freq := int(goamdsmi.GO_gpu_dev_gpu_clk_freq_get_mclk(i))
|
||||
// }
|
||||
// }
|
||||
func GO_gpu_dev_gpu_clk_freq_get_mclk(i int) (C.uint64_t) {
|
||||
return C.goamdsmi_gpu_dev_gpu_clk_freq_get_mclk(C.uint(i))
|
||||
}
|
||||
|
||||
// ``GO_gpu_od_volt_freq_range_min_get_sclk`` returns the minimum system clock
|
||||
// (SCLK) frequency of the GPU at the specified GPU index.
|
||||
//
|
||||
// Input parameter: ``int``, GPU index.
|
||||
//
|
||||
// Output: ``uint64``, returns GPU minimum SCLK frequency level on success or
|
||||
// ``0xFFFFFFFFFFFFFFFF`` on fail.
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// import "github.com/ROCm/amdsmi"
|
||||
//
|
||||
// if true == goamdsmi.GO_gpu_init() {
|
||||
// num_gpus := int(goamdsmi.GO_gpu_num_monitor_devices())
|
||||
// for i := 0; i < num_gpus; i++ {
|
||||
// dev_min_sclk := int(goamdsmi.GO_gpu_od_volt_freq_range_min_get_sclk(i))
|
||||
// }
|
||||
// }
|
||||
func GO_gpu_od_volt_freq_range_min_get_sclk(i int) (C.uint64_t) {
|
||||
return C.goamdsmi_gpu_od_volt_freq_range_min_get_sclk(C.uint(i))
|
||||
}
|
||||
|
||||
// ``GO_gpu_od_volt_freq_range_min_get_mclk`` returns the minimum memory clock
|
||||
// (MCLK) frequency of the GPU at the specified GPU index.
|
||||
//
|
||||
// Input parameter: ``int``, GPU index.
|
||||
//
|
||||
// Output: ``uint64``, returns GPU minimum MCLK frequency level on success or
|
||||
// ``0xFFFFFFFFFFFFFFFF`` on fail.
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// import "github.com/ROCm/amdsmi"
|
||||
//
|
||||
// if true == goamdsmi.GO_gpu_init() {
|
||||
// num_gpus := int(goamdsmi.GO_gpu_num_monitor_devices())
|
||||
// for i := 0; i < num_gpus; i++ {
|
||||
// dev_min_mclk := int(goamdsmi.GO_gpu_od_volt_freq_range_min_get_mclk(i))
|
||||
// }
|
||||
// }
|
||||
func GO_gpu_od_volt_freq_range_min_get_mclk(i int) (C.uint64_t) {
|
||||
return C.goamdsmi_gpu_od_volt_freq_range_min_get_mclk(C.uint(i))
|
||||
}
|
||||
|
||||
// ``GO_gpu_od_volt_freq_range_max_get_sclk`` returns the maximum system clock
|
||||
// (SCLK) frequency of the GPU at the specified GPU index.
|
||||
//
|
||||
// Input parameter: ``int``, GPU index.
|
||||
//
|
||||
// Output: ``uint64``, returns GPU maximum SCLK frequency level on success or
|
||||
// ``0xFFFFFFFFFFFFFFFF`` on fail.
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// import "github.com/ROCm/amdsmi"
|
||||
//
|
||||
// if true == goamdsmi.GO_gpu_init() {
|
||||
// num_gpus := int(goamdsmi.GO_gpu_num_monitor_devices())
|
||||
// for i := 0; i < num_gpus; i++ {
|
||||
// dev_max_sclk := int(goamdsmi.GO_gpu_od_volt_freq_range_max_get_sclk(i))
|
||||
// }
|
||||
// }
|
||||
func GO_gpu_od_volt_freq_range_max_get_sclk(i int) (C.uint64_t) {
|
||||
return C.goamdsmi_gpu_od_volt_freq_range_max_get_sclk(C.uint(i))
|
||||
}
|
||||
|
||||
// ``GO_gpu_od_volt_freq_range_max_get_mclk`` returns the maximum memory clock
|
||||
// (MCLK) frequency of the GPU at the specified GPU index.
|
||||
//
|
||||
// Input parameter: ``int``, GPU index.
|
||||
//
|
||||
// Output: ``uint64``, returns GPU maximum MCLK frequency level on success or
|
||||
// ``0xFFFFFFFFFFFFFFFF`` on fail.
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// import "github.com/ROCm/amdsmi"
|
||||
//
|
||||
// if true == goamdsmi.GO_gpu_init() {
|
||||
// num_gpus := int(goamdsmi.GO_gpu_num_monitor_devices())
|
||||
// for i := 0; i < num_gpus; i++ {
|
||||
// dev_max_mclk := int(goamdsmi.GO_gpu_od_volt_freq_range_max_get_mclk(i))
|
||||
// }
|
||||
// }
|
||||
func GO_gpu_od_volt_freq_range_max_get_mclk(i int) (C.uint64_t) {
|
||||
return C.goamdsmi_gpu_od_volt_freq_range_max_get_mclk(C.uint(i))
|
||||
}
|
||||
|
||||
// ``GO_gpu_dev_gpu_busy_percent_get`` returns the busy percentage of the GPU at the
|
||||
// specified GPU index.
|
||||
//
|
||||
// Input parameter: ``int``, GPU index.
|
||||
//
|
||||
// Output: ``uint32``, returns GPU busy percentage on success or ``0xFFFFFFFF`` on fail.
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// import "github.com/ROCm/amdsmi"
|
||||
//
|
||||
// if true == goamdsmi.GO_gpu_init() {
|
||||
// num_gpus := int(goamdsmi.GO_gpu_num_monitor_devices())
|
||||
// for i := 0; i < num_gpus; i++ {
|
||||
// dev_busy_perc := int(goamdsmi.GO_gpu_dev_gpu_busy_percent_get(i))
|
||||
// }
|
||||
// }
|
||||
func GO_gpu_dev_gpu_busy_percent_get(i int) (C.uint32_t) {
|
||||
return C.goamdsmi_gpu_dev_gpu_busy_percent_get(C.uint(i))
|
||||
}
|
||||
|
||||
// ``GO_gpu_dev_gpu_memory_busy_percent_get`` returns the memory busy percentage of
|
||||
// the GPU at the specified GPU index.
|
||||
//
|
||||
// Input parameter: ``int``, GPU index.
|
||||
//
|
||||
// Output: ``uint64``, returns GPU memory busy percentage on success or
|
||||
// ``0xFFFFFFFFFFFFFFFF`` on fail.
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// import "github.com/ROCm/amdsmi"
|
||||
//
|
||||
// if true == goamdsmi.GO_gpu_init() {
|
||||
// num_gpus := int(goamdsmi.GO_gpu_num_monitor_devices())
|
||||
// for i := 0; i < num_gpus; i++ {
|
||||
// mem_busy_perc := int(goamdsmi.GO_gpu_dev_gpu_memory_busy_percent_get(i))
|
||||
// }
|
||||
// }
|
||||
func GO_gpu_dev_gpu_memory_busy_percent_get(i int) (C.uint64_t) {
|
||||
return C.goamdsmi_gpu_dev_gpu_memory_busy_percent_get(C.uint(i))
|
||||
}
|
||||
|
||||
// ``GO_gpu_dev_gpu_memory_usage_get`` returns the memory usage of the GPU at the
|
||||
// specified GPU index.
|
||||
//
|
||||
// Input parameter: ``int``, GPU index.
|
||||
//
|
||||
// Output: ``uint64``, returns GPU memory usage on success or ``0xFFFFFFFFFFFFFFFF`` on
|
||||
// fail.
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// import "github.com/ROCm/amdsmi"
|
||||
//
|
||||
// if true == goamdsmi.GO_gpu_init() {
|
||||
// num_gpus := int(goamdsmi.GO_gpu_num_monitor_devices())
|
||||
// for i := 0; i < num_gpus; i++ {
|
||||
// mem_usage := int(goamdsmi.GO_gpu_dev_gpu_memory_usage_get(i))
|
||||
// }
|
||||
// }
|
||||
func GO_gpu_dev_gpu_memory_usage_get (i int) (C.uint64_t) {
|
||||
return C.goamdsmi_gpu_dev_gpu_memory_usage_get(C.uint(i))
|
||||
}
|
||||
|
||||
// ``GO_gpu_dev_gpu_memory_total_get`` returns the total memory of the GPU at the
|
||||
// specified GPU index.
|
||||
//
|
||||
// Input parameter: ``int``, GPU index.
|
||||
//
|
||||
// Output: ``uint64``, returns GPU memory usage on success or ``0xFFFFFFFFFFFFFFFF`` on
|
||||
// fail.
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// import "github.com/ROCm/amdsmi"
|
||||
//
|
||||
// if true == goamdsmi.GO_gpu_init() {
|
||||
// num_gpus := int(goamdsmi.GO_gpu_num_monitor_devices())
|
||||
// for i := 0; i < num_gpus; i++ {
|
||||
// mem_total := int(goamdsmi.GO_gpu_dev_gpu_memory_total_get(i))
|
||||
// }
|
||||
// }
|
||||
func GO_gpu_dev_gpu_memory_total_get (i int) (C.uint64_t) {
|
||||
return C.goamdsmi_gpu_dev_gpu_memory_total_get(C.uint(i))
|
||||
}
|
||||
|
||||
//CPU ESMI or AMDSMI calls
|
||||
|
||||
// ``GO_cpu_init`` initializes the CPU and reports whether the initialization was
|
||||
// successful.
|
||||
//
|
||||
// Output: ``bool``, returns true on success or false on fail.
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// import "github.com/ROCm/amdsmi"
|
||||
//
|
||||
// if true == goamdsmi.GO_cpu_init() {
|
||||
// CPU initialization is successful...
|
||||
// }
|
||||
func GO_cpu_init() (bool) {
|
||||
return bool(C.goamdsmi_cpu_init())
|
||||
}
|
||||
|
||||
// ``GO_cpu_number_of_sockets_get`` returns the number of available CPU sockets.
|
||||
//
|
||||
// Output: ``uint``, returns the number of CPU sockets on success or 0 on fail.
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// import "github.com/ROCm/amdsmi"
|
||||
//
|
||||
// if true == goamdsmi.GO_cpu_init() {
|
||||
// num_sockets := int(goamdsmi.GO_cpu_number_of_sockets_get())
|
||||
// }
|
||||
func GO_cpu_number_of_sockets_get() (uint) {
|
||||
return uint(C.goamdsmi_cpu_number_of_sockets_get())
|
||||
}
|
||||
|
||||
// ``GO_cpu_number_of_threads_get`` returns the number of available CPU sockets.
|
||||
//
|
||||
// Output: ``uint``, returns the number of CPU threads on success or 0 on fail.
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// import "github.com/ROCm/amdsmi"
|
||||
//
|
||||
// if true == goamdsmi.GO_cpu_init() {
|
||||
// num_threads := int(goamdsmi.GO_cpu_number_of_threads_get())
|
||||
// }
|
||||
func GO_cpu_number_of_threads_get() (uint) {
|
||||
return uint(C.goamdsmi_cpu_number_of_threads_get())
|
||||
}
|
||||
|
||||
// ``GO_cpu_threads_per_core_get`` returns the thread count per available CPU core.
|
||||
//
|
||||
// Output: ``uint``, returns the CPU thread count on success or 0 on fail.
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// import "github.com/ROCm/amdsmi"
|
||||
//
|
||||
// if true == goamdsmi.GO_cpu_init() {
|
||||
// num_threads_per_core := int(goamdsmi.GO_cpu_threads_per_core_get())
|
||||
// }
|
||||
func GO_cpu_threads_per_core_get() (uint) {
|
||||
return uint(C.goamdsmi_cpu_threads_per_core_get())
|
||||
}
|
||||
|
||||
// ``GO_cpu_core_energy_get`` returns the CPU core energy for the specified thread
|
||||
// index.
|
||||
//
|
||||
// Input parameter: ``int``, thread index.
|
||||
//
|
||||
// Output: ``uint64``, returns CPU core energy on success or ``0xFFFFFFFFFFFFFFFF`` on
|
||||
// fail.
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// import "github.com/ROCm/amdsmi"
|
||||
//
|
||||
// if true == goamdsmi.GO_cpu_init() {
|
||||
// num_threads := int(goamdsmi.GO_cpu_number_of_threads_get())
|
||||
// for i := 0; i < num_threads; i++ {
|
||||
// core_energy := int(goamdsmi.GO_cpu_core_energy_get(i))
|
||||
// }
|
||||
// }
|
||||
func GO_cpu_core_energy_get(i int) (C.uint64_t) {
|
||||
return C.goamdsmi_cpu_core_energy_get(C.uint(i))
|
||||
}
|
||||
|
||||
// ``GO_cpu_core_boostlimit_get`` returns the CPU core boost limit for the specified
|
||||
// thread index.
|
||||
//
|
||||
// Input parameter: ``int``, thread index.
|
||||
//
|
||||
// Output: ``uint32``, returns CPU core boost limit on success or ``0xFFFFFFFF`` on
|
||||
// fail.
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// import "github.com/ROCm/amdsmi"
|
||||
//
|
||||
// if true == goamdsmi.GO_cpu_init() {
|
||||
// num_threads := int(goamdsmi.GO_cpu_number_of_threads_get())
|
||||
// for i := 0; i < num_threads; i++ {
|
||||
// core_boost_limit := int(goamdsmi.GO_cpu_core_boostlimit_get(i))
|
||||
// }
|
||||
// }
|
||||
func GO_cpu_core_boostlimit_get(i int) (C.uint32_t) {
|
||||
return C.goamdsmi_cpu_core_boostlimit_get(C.uint(i))
|
||||
}
|
||||
|
||||
// ``GO_cpu_socket_energy_get`` returns the CPU socket energy for the specified
|
||||
// socket index.
|
||||
//
|
||||
// Input parameter: ``int``, socket index.
|
||||
//
|
||||
// Output: ``uint64``, returns socket energy level on success or ``0xFFFFFFFFFFFFFFFF``
|
||||
// on fail.
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// import "github.com/ROCm/amdsmi"
|
||||
//
|
||||
// if true == goamdsmi.GO_cpu_init() {
|
||||
// num_sockets := int(goamdsmi.GO_cpu_number_of_sockets_get())
|
||||
// for i := 0; i < num_sockets; i++ {
|
||||
// socket_energy := int(goamdsmi.GO_cpu_socket_energy_get(i))
|
||||
// }
|
||||
// }
|
||||
func GO_cpu_socket_energy_get(i int) (C.uint64_t) {
|
||||
return C.goamdsmi_cpu_socket_energy_get(C.uint(i))
|
||||
}
|
||||
|
||||
// ``GO_cpu_socket_power_get`` returns the socket power for the specified socket
|
||||
// index.
|
||||
//
|
||||
// Input parameter: ``int``, socket index.
|
||||
//
|
||||
// Output: ``uint32``, returns socket energy level on success or ``0xFFFFFFFF``
|
||||
// on fail.
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// import "github.com/ROCm/amdsmi"
|
||||
//
|
||||
// if true == goamdsmi.GO_cpu_init() {
|
||||
// num_sockets := int(goamdsmi.GO_cpu_number_of_sockets_get())
|
||||
// for i := 0; i < num_sockets; i++ {
|
||||
// socket_power := int(goamdsmi.GO_cpu_socket_power_get(i))
|
||||
// }
|
||||
// }
|
||||
func GO_cpu_socket_power_get(i int) (C.uint32_t) {
|
||||
return C.goamdsmi_cpu_socket_power_get(C.uint(i))
|
||||
}
|
||||
|
||||
// ``GO_cpu_socket_power_cap_get`` returns the socket power cap for the specified
|
||||
// socket index.
|
||||
//
|
||||
// Input parameter: ``int``, socket index.
|
||||
//
|
||||
// Output: ``uint32``, returns socket power cap on success or ``0xFFFFFFFF``
|
||||
// on fail.
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// import "github.com/ROCm/amdsmi"
|
||||
//
|
||||
// if true == goamdsmi.GO_cpu_init() {
|
||||
// num_sockets := int(goamdsmi.GO_cpu_number_of_sockets_get())
|
||||
// for i := 0; i < num_sockets; i++ {
|
||||
// socket_power_cap := int(goamdsmi.GO_cpu_socket_power_cap_get(i))
|
||||
// }
|
||||
// }
|
||||
func GO_cpu_socket_power_cap_get(i int) (C.uint32_t) {
|
||||
return C.goamdsmi_cpu_socket_power_cap_get(C.uint(i))
|
||||
}
|
||||
|
||||
// ``GO_cpu_socket_power_cap_get`` returns the PROCHOT status for the specified
|
||||
// socket index.
|
||||
//
|
||||
// Input parameter: ``int``, socket index.
|
||||
//
|
||||
// Output: ``uint32``, returns PROCHOT status on success or ``0xFFFFFFFF`` on fail.
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// import "github.com/ROCm/amdsmi"
|
||||
//
|
||||
// if true == goamdsmi.GO_cpu_init() {
|
||||
// num_sockets := int(goamdsmi.GO_cpu_number_of_sockets_get())
|
||||
// for i := 0; i < num_sockets; i++ {
|
||||
// prochot_status := int(goamdsmi.GO_cpu_prochot_status_get(i))
|
||||
// }
|
||||
// }
|
||||
func GO_cpu_prochot_status_get(i int) (C.uint32_t) {
|
||||
return C.goamdsmi_cpu_prochot_status_get(C.uint(i))
|
||||
}
|
||||
@@ -0,0 +1,115 @@
|
||||
# SPDX-License-Identifier: MIT
|
||||
# Copyright (c) 2024, Advanced Micro Devices, Inc.
|
||||
|
||||
#
|
||||
# Minimum version of cmake required
|
||||
#
|
||||
cmake_minimum_required(VERSION 3.5.0)
|
||||
|
||||
message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&")
|
||||
message(" CMake AMD goamdsmi_shim Library ")
|
||||
message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&")
|
||||
|
||||
set(AMDSMI_DIR "" CACHE PATH "path to amdsmi installation")
|
||||
|
||||
if(ENABLE_DEBUG_LEVEL)
|
||||
add_definitions(-DENABLE_DEBUG_LEVEL=${ENABLE_DEBUG_LEVEL})
|
||||
message("**** Enabling Debug Level=${ENABLE_DEBUG_LEVEL} ****")
|
||||
else()
|
||||
add_definitions(-DENABLE_DEBUG_LEVEL=0)
|
||||
endif()
|
||||
|
||||
set(GOAMDSMI_SHIM "goamdsmi_shim")
|
||||
set(GOAMDSMI_SHIM_LIB "goamdsmi")
|
||||
set(GOAMDSMI_SHIM_COMPONENT "lib${GOAMDSMI_SHIM}")
|
||||
set(GOAMDSMI_SHIM_TARGET "${GOAMDSMI_SHIM}64")
|
||||
|
||||
# The following default version values should be updated as appropriate for
|
||||
# ABI breaks (update MAJOR and MINOR), and ABI/API additions (update MINOR).
|
||||
# Until ABI stabilizes VERSION_MAJOR will be 0. This should be over-ridden
|
||||
# by git tags (through "git describe") when they are present.
|
||||
set(VERSION_MAJOR 1)
|
||||
set(VERSION_MINOR 0)
|
||||
set(VERSION_PATCH 0)
|
||||
set(VERSION_NUM_COMMIT 0)
|
||||
|
||||
set(SO_VERSION_STRING "${VERSION_MAJOR}.${VERSION_MINOR}")
|
||||
|
||||
set(${GOAMDSMI_SHIM}_VERSION_MAJOR "${VERSION_MAJOR}")
|
||||
set(${GOAMDSMI_SHIM}_VERSION_MINOR "${VERSION_MINOR}")
|
||||
set(${GOAMDSMI_SHIM}_VERSION_PATCH "0")
|
||||
set(${GOAMDSMI_SHIM}_VERSION_BUILD "0")
|
||||
message("SOVERSION: ${SO_VERSION_STRING}")
|
||||
|
||||
project(${GOAMDSMI_SHIM_TARGET})
|
||||
|
||||
if(NOT DEFINED CPACK_PACKAGE_VENDOR)
|
||||
set(CPACK_PACKAGE_VENDOR "AMD")
|
||||
endif()
|
||||
|
||||
if(NOT DEFINED CPACK_PACKAGE_CONTACT)
|
||||
set(CPACK_PACKAGE_CONTACT "Advanced Micro Devices Inc.")
|
||||
endif()
|
||||
|
||||
if(NOT DEFINED CPACK_PACKAGE_DESCRIPTION_SUMMARY)
|
||||
set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "AMD CGO wrapper")
|
||||
endif()
|
||||
|
||||
if(NOT GOAMDSMI_SHIM_PACKAGE)
|
||||
set(GOAMDSMI_SHIM_PACKAGE goamdsmi_shim_lib64)
|
||||
endif()
|
||||
|
||||
set(CPACK_PACKAGE_FILE_NAME "${GOAMDSMI_SHIM_PACKAGE}-${SO_VERSION_STRING}")
|
||||
|
||||
## Compiler flags
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -fpic -fno-rtti -m64")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse -msse2 -std=c++11 ")
|
||||
# Use this instead of above for 32 bit
|
||||
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32")
|
||||
|
||||
if("${CMAKE_BUILD_TYPE}" STREQUAL Release)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2")
|
||||
else()
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb -O0 -DDEBUG")
|
||||
endif()
|
||||
|
||||
set(go_amd_smi_headers)
|
||||
|
||||
set(go_amd_smi_sources)
|
||||
|
||||
set(go_amd_smi_includes)
|
||||
|
||||
add_subdirectory(smiwrapper)
|
||||
list(APPEND go_amd_smi_headers smiwrapper/goamdsmi.h ${go_amd_smi_headers})
|
||||
list(APPEND go_amd_smi_headers smiwrapper/amdsmi_go_shim.h ${go_amd_smi_headers})
|
||||
list(APPEND go_amd_smi_sources smiwrapper/amdsmi_go_shim.c)
|
||||
list(APPEND go_amd_smi_includes ${CMAKE_CURRENT_SOURCE_DIR}/smiwrapper)
|
||||
|
||||
add_library(${GOAMDSMI_SHIM_TARGET} SHARED ${go_amd_smi_sources} ${go_amd_smi_headers} ${go_amd_smi_includes})
|
||||
|
||||
target_link_libraries(${GOAMDSMI_SHIM_TARGET} pthread rt m)
|
||||
|
||||
target_link_libraries(${GOAMDSMI_SHIM_TARGET} amd_smi)
|
||||
target_link_libraries(${GOAMDSMI_SHIM_TARGET} -L${AMDSMI_DIR}/lib)
|
||||
target_link_libraries(${GOAMDSMI_SHIM_TARGET} -L${AMDSMI_DIR}/lib64)
|
||||
|
||||
## Set the VERSION and SOVERSION values
|
||||
set_property(TARGET ${GOAMDSMI_SHIM_TARGET} PROPERTY SOVERSION "${VERSION_MAJOR}")
|
||||
set_property(TARGET ${GOAMDSMI_SHIM_TARGET} PROPERTY VERSION "${SO_VERSION_STRING}")
|
||||
|
||||
## If the library is a release, strip the target library
|
||||
if("${CMAKE_BUILD_TYPE}" STREQUAL Release)
|
||||
add_custom_command(TARGET ${GOAMDSMI_SHIM_TARGET} POST_BUILD COMMAND ${CMAKE_STRIP} lib${GOAMDSMI_SHIM_TARGET}.so)
|
||||
endif()
|
||||
|
||||
set(go_amd_smi_install_headers smiwrapper/goamdsmi.h smiwrapper/amdsmi_go_shim.h)
|
||||
|
||||
## Add the install directives for the runtime library.
|
||||
install(
|
||||
TARGETS ${GOAMDSMI_SHIM_TARGET}
|
||||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT ${GOAMDSMI_SHIM_COMPONENT})
|
||||
install(
|
||||
FILES ${go_amd_smi_install_headers}
|
||||
DESTINATION include)
|
||||
|
||||
include_directories(${go_amd_smi_includes})
|
||||
@@ -0,0 +1,14 @@
|
||||
# SPDX-License-Identifier: MIT
|
||||
# Copyright (c) 2024, Advanced Micro Devices, Inc.
|
||||
|
||||
set(go_amd_smi_headers ${CMAKE_CURRENT_SOURCE_DIR}/goamdsmi.h ${CMAKE_CURRENT_SOURCE_DIR}/amdsmi_go_shim.h
|
||||
CACHE INTERNAL "")
|
||||
|
||||
set(go_amd_smi_sources ${CMAKE_CURRENT_SOURCE_DIR}/amdsmi_go_shim.c CACHE INTERNAL "")
|
||||
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${go_amd_smi_amdsmi_includes})
|
||||
|
||||
add_library(go_amd_smi_ OBJECT ${go_amd_smi_sources} ${go_amd_smi_headers})
|
||||
|
||||
### Shared libraries need PIC
|
||||
set_property(TARGET ${go_amd_smi_} PROPERTY POSITION_INDEPENDENT_CODE 1)
|
||||
@@ -0,0 +1,661 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
/*
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include "amdsmi_go_shim.h"
|
||||
#include <amd_smi/amdsmi.h>
|
||||
#include <unistd.h>
|
||||
#define nullptr ((void*)0)
|
||||
|
||||
#define MAX_SOCKET_ACROSS_SYSTEM 4
|
||||
#define CPU_0 0
|
||||
#define GPU_SENSOR_0 0
|
||||
#define MAX_CPU_PER_SOCKET 4
|
||||
#define MAX_PHYSICALCORE_ACROSS_SYSTEM 384
|
||||
#define MAX_LOGICALCORE_ACROSS_SYSTEM 768
|
||||
#define MAX_GPU_DEVICE_ACROSS_SYSTEM 24
|
||||
#define MAX_GPU_POWER_FROM_DRIVER 0xFFFF
|
||||
|
||||
#define AMDSMI_DRIVER_NAME "AMDSMI"
|
||||
#define AMDSMI_LIB_FILE "/opt/rocm/lib/libamd_smi.so"
|
||||
#define AMDSMI_LIB64_FILE "/opt/rocm/lib64/libamd_smi.so"
|
||||
|
||||
#define AMDGPU_DRIVER_NAME "AMDGPUDriver"
|
||||
#define AMDGPU_INITSTATE_FILE "/sys/module/amdgpu/initstate"
|
||||
|
||||
#define AMDHSMP_DRIVER_NAME "AMDHSMPDriver"
|
||||
#define AMDHSMP_INITSTATE_FILE "/dev/hsmp"
|
||||
|
||||
static uint32_t num_apuSockets = GOAMDSMI_VALUE_0;
|
||||
static uint32_t num_cpuSockets = GOAMDSMI_VALUE_0;
|
||||
static uint32_t num_gpuSockets = GOAMDSMI_VALUE_0;
|
||||
static uint32_t cpuInitCompleted = false;
|
||||
static uint32_t gpuInitCompleted = false;
|
||||
static uint32_t apuInitCompleted = false;
|
||||
|
||||
static uint32_t num_cpu_inAllSocket = GOAMDSMI_VALUE_0;
|
||||
static uint32_t num_cpu_physicalCore_inAllSocket = GOAMDSMI_VALUE_0;
|
||||
static uint32_t num_gpu_devices_inAllSocket = GOAMDSMI_VALUE_0;
|
||||
|
||||
static amdsmi_socket_handle amdsmi_apusocket_handle_all_socket[MAX_SOCKET_ACROSS_SYSTEM+MAX_GPU_DEVICE_ACROSS_SYSTEM] = {0};
|
||||
static amdsmi_socket_handle amdsmi_cpusocket_handle_all_socket[MAX_SOCKET_ACROSS_SYSTEM] = {0};
|
||||
static amdsmi_socket_handle amdsmi_gpusocket_handle_all_socket[MAX_GPU_DEVICE_ACROSS_SYSTEM] = {0};
|
||||
static amdsmi_processor_handle amdsmi_processor_handle_all_cpu_across_socket[MAX_SOCKET_ACROSS_SYSTEM*MAX_CPU_PER_SOCKET] = {0};
|
||||
static amdsmi_processor_handle amdsmi_processor_handle_all_cpu_physicalCore_across_socket[MAX_PHYSICALCORE_ACROSS_SYSTEM] = {0};
|
||||
static amdsmi_processor_handle amdsmi_processor_handle_all_gpu_device_across_socket[MAX_GPU_DEVICE_ACROSS_SYSTEM] = {0};
|
||||
|
||||
goamdsmi_status_t is_file_present(const char* driver_name, const char* file_name)
|
||||
{
|
||||
if(0 == access(file_name, F_OK))
|
||||
{
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, Success, %s found \"%s\" and returns:%d\n", driver_name, file_name, GOAMDSMI_STATUS_SUCCESS);}
|
||||
return GOAMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_2)) {printf("AMDSMI, Status, %s not found, missing \"%s\" and returns:%d\n", driver_name, file_name, GOAMDSMI_STATUS_FAILURE);}
|
||||
return GOAMDSMI_STATUS_FAILURE;
|
||||
}
|
||||
|
||||
goamdsmi_status_t check_amdgpu_driver()
|
||||
{
|
||||
return is_file_present(AMDGPU_DRIVER_NAME, AMDGPU_INITSTATE_FILE);
|
||||
}
|
||||
|
||||
goamdsmi_status_t check_hsmp_driver()
|
||||
{
|
||||
return is_file_present(AMDHSMP_DRIVER_NAME, AMDHSMP_INITSTATE_FILE);
|
||||
}
|
||||
|
||||
goamdsmi_status_t go_shim_amdsmiapu_init(goamdsmi_Init_t goamdsmi_Init)
|
||||
{
|
||||
if((GOAMDSMI_CPU_INIT == goamdsmi_Init) && (true == cpuInitCompleted))
|
||||
{
|
||||
if((GOAMDSMI_VALUE_0 == num_cpuSockets)||(GOAMDSMI_VALUE_0 == num_cpu_inAllSocket)||(GOAMDSMI_VALUE_0 == num_cpu_physicalCore_inAllSocket))
|
||||
{
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, Failed, Returns previous enumurated AMDSMICPUInit:%d, CpuSocketCount:%d, CpuCount:%d, CpuPhysicalCoreCount:%d\n", GOAMDSMI_STATUS_FAILURE, num_cpuSockets, num_cpu_inAllSocket, num_cpu_physicalCore_inAllSocket);}
|
||||
return GOAMDSMI_STATUS_FAILURE;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, Success, Returns previous enumurated AMDSMICPUInit:%d, CpuSocketCount:%d, CpuCount:%d, CpuPhysicalCoreCount:%d\n", GOAMDSMI_STATUS_SUCCESS, num_cpuSockets, num_cpu_inAllSocket, num_cpu_physicalCore_inAllSocket);}
|
||||
return GOAMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
if((GOAMDSMI_GPU_INIT == goamdsmi_Init) && (true == gpuInitCompleted))
|
||||
{
|
||||
if((GOAMDSMI_VALUE_0 == num_gpuSockets)||(GOAMDSMI_VALUE_0 == num_gpu_devices_inAllSocket))
|
||||
{
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, Failed, Returns previous enumurated AMDSMIGPUInit:%d, GpuSocketCount:%d, GpuCount:%d\n", GOAMDSMI_STATUS_FAILURE, num_gpuSockets, num_gpu_devices_inAllSocket);}
|
||||
return GOAMDSMI_STATUS_FAILURE;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, Success, Returns previous enumurated AMDSMIGPUInit:%d, GpuSocketCount:%d, GpuCount:%d\n", GOAMDSMI_STATUS_SUCCESS, num_gpuSockets, num_gpu_devices_inAllSocket);}
|
||||
return GOAMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if ((GOAMDSMI_STATUS_SUCCESS == check_amdgpu_driver()) && (GOAMDSMI_STATUS_SUCCESS == check_hsmp_driver()))
|
||||
{
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_2)) {printf("AMDSMI, Status, Identified APU machine and going to enumurate APU\n");}
|
||||
|
||||
if( (AMDSMI_STATUS_SUCCESS == amdsmi_init(AMDSMI_INIT_AMD_APUS)) &&
|
||||
(AMDSMI_STATUS_SUCCESS == amdsmi_get_socket_handles(&num_apuSockets, nullptr)) &&
|
||||
(AMDSMI_STATUS_SUCCESS == amdsmi_get_socket_handles(&num_apuSockets, &amdsmi_apusocket_handle_all_socket[0])) &&
|
||||
(GOAMDSMI_VALUE_0 != num_apuSockets))
|
||||
{
|
||||
cpuInitCompleted = true;
|
||||
gpuInitCompleted = true;
|
||||
apuInitCompleted = true;
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, Success, Identified APU machine ApuNumSockets=%d\n",num_apuSockets);}
|
||||
for(uint32_t socket_counter = 0; socket_counter < num_apuSockets; socket_counter++)
|
||||
{
|
||||
uint32_t num_cpu = GOAMDSMI_VALUE_0;
|
||||
uint32_t num_cpu_physicalCores = GOAMDSMI_VALUE_0;
|
||||
uint32_t num_gpu_devices = GOAMDSMI_VALUE_0;
|
||||
|
||||
//CPU
|
||||
processor_type_t cpu_processor_type = AMDSMI_PROCESSOR_TYPE_AMD_CPU;
|
||||
processor_type_t cpu_core_processor_type = AMDSMI_PROCESSOR_TYPE_AMD_CPU_CORE;
|
||||
if( (AMDSMI_STATUS_SUCCESS == amdsmi_get_processor_handles_by_type(amdsmi_apusocket_handle_all_socket[socket_counter], cpu_processor_type, nullptr, &num_cpu)) &&
|
||||
(GOAMDSMI_VALUE_0 != num_cpu) &&
|
||||
(AMDSMI_STATUS_SUCCESS == amdsmi_get_processor_handles_by_type(amdsmi_apusocket_handle_all_socket[socket_counter], cpu_processor_type, &amdsmi_processor_handle_all_cpu_across_socket[num_cpu_inAllSocket], &num_cpu)))
|
||||
{
|
||||
if( (AMDSMI_STATUS_SUCCESS == amdsmi_get_processor_handles_by_type(amdsmi_apusocket_handle_all_socket[socket_counter], cpu_core_processor_type, nullptr, &num_cpu_physicalCores)) &&
|
||||
(GOAMDSMI_VALUE_0 != num_cpu_physicalCores) &&
|
||||
(AMDSMI_STATUS_SUCCESS == amdsmi_get_processor_handles_by_type(amdsmi_apusocket_handle_all_socket[socket_counter], cpu_core_processor_type, &amdsmi_processor_handle_all_cpu_physicalCore_across_socket[num_cpu_physicalCore_inAllSocket], &num_cpu_physicalCores)))
|
||||
{
|
||||
num_cpu_physicalCore_inAllSocket = num_cpu_physicalCore_inAllSocket+num_cpu_physicalCores;
|
||||
}
|
||||
num_cpu_inAllSocket = num_cpu_inAllSocket+num_cpu;
|
||||
num_cpuSockets = num_cpuSockets+1;
|
||||
}
|
||||
|
||||
//GPU
|
||||
processor_type_t gpu_device_processor_type = AMDSMI_PROCESSOR_TYPE_AMD_GPU;
|
||||
if( (AMDSMI_STATUS_SUCCESS == amdsmi_get_processor_handles_by_type(amdsmi_apusocket_handle_all_socket[socket_counter], gpu_device_processor_type, nullptr, &num_gpu_devices)) &&
|
||||
(GOAMDSMI_VALUE_0 != num_gpu_devices) &&
|
||||
(AMDSMI_STATUS_SUCCESS == amdsmi_get_processor_handles_by_type(amdsmi_apusocket_handle_all_socket[socket_counter], gpu_device_processor_type, &amdsmi_processor_handle_all_gpu_device_across_socket[num_gpu_devices_inAllSocket], &num_gpu_devices)))
|
||||
{
|
||||
num_gpu_devices_inAllSocket = num_gpu_devices_inAllSocket+num_gpu_devices;
|
||||
num_gpuSockets = num_gpuSockets+1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else if(GOAMDSMI_CPU_INIT == goamdsmi_Init)
|
||||
{
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_2)) {printf("AMDSMI, Status, Going to enumurate only CPU\n");}
|
||||
cpuInitCompleted = true;
|
||||
|
||||
if (GOAMDSMI_STATUS_SUCCESS == check_hsmp_driver())
|
||||
{
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_2)) {printf("AMDSMI, Status, Identified CPU Driver and going to enumurate only CPU\n");}
|
||||
|
||||
if( (AMDSMI_STATUS_SUCCESS != amdsmi_init(AMDSMI_INIT_AMD_CPUS)) ||
|
||||
(AMDSMI_STATUS_SUCCESS != amdsmi_get_socket_handles(&num_cpuSockets, nullptr)) ||
|
||||
(AMDSMI_STATUS_SUCCESS != amdsmi_get_socket_handles(&num_cpuSockets, &amdsmi_cpusocket_handle_all_socket[0])) ||
|
||||
(GOAMDSMI_VALUE_0 == num_cpuSockets))
|
||||
{
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, Failed, AMDSMICPUInit:0, CpuNumSockets=0\n");}
|
||||
return GOAMDSMI_STATUS_FAILURE;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_3)) {printf("AMDSMI, Status, Missing CPU Driver and not going to enumurate only CPU\n");}
|
||||
}
|
||||
//CPU
|
||||
for(uint32_t cpu_socket_counter = 0; cpu_socket_counter < num_cpuSockets; cpu_socket_counter++)
|
||||
{
|
||||
uint32_t num_cpu = GOAMDSMI_VALUE_0;
|
||||
uint32_t num_cpu_physicalCores = GOAMDSMI_VALUE_0;
|
||||
|
||||
processor_type_t cpu_processor_type = AMDSMI_PROCESSOR_TYPE_AMD_CPU;
|
||||
processor_type_t cpu_core_processor_type = AMDSMI_PROCESSOR_TYPE_AMD_CPU_CORE;
|
||||
if( (AMDSMI_STATUS_SUCCESS == amdsmi_get_processor_handles_by_type(amdsmi_cpusocket_handle_all_socket[cpu_socket_counter], cpu_processor_type, nullptr, &num_cpu)) &&
|
||||
(GOAMDSMI_VALUE_0 != num_cpu) &&
|
||||
(AMDSMI_STATUS_SUCCESS == amdsmi_get_processor_handles_by_type(amdsmi_cpusocket_handle_all_socket[cpu_socket_counter], cpu_processor_type, &amdsmi_processor_handle_all_cpu_across_socket[num_cpu_inAllSocket], &num_cpu)))
|
||||
{
|
||||
if( (AMDSMI_STATUS_SUCCESS == amdsmi_get_processor_handles_by_type(amdsmi_cpusocket_handle_all_socket[cpu_socket_counter], cpu_core_processor_type, nullptr, &num_cpu_physicalCores)) &&
|
||||
(GOAMDSMI_VALUE_0 != num_cpu_physicalCores) &&
|
||||
(AMDSMI_STATUS_SUCCESS == amdsmi_get_processor_handles_by_type(amdsmi_cpusocket_handle_all_socket[cpu_socket_counter], cpu_core_processor_type, &amdsmi_processor_handle_all_cpu_physicalCore_across_socket[num_cpu_physicalCore_inAllSocket], &num_cpu_physicalCores)))
|
||||
{
|
||||
num_cpu_physicalCore_inAllSocket = num_cpu_physicalCore_inAllSocket+num_cpu_physicalCores;
|
||||
}
|
||||
num_cpu_inAllSocket = num_cpu_inAllSocket+num_cpu;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if(GOAMDSMI_GPU_INIT == goamdsmi_Init)
|
||||
{
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_2)) {printf("AMDSMI, Status, Going to enumurate only GPU\n");}
|
||||
gpuInitCompleted = true;
|
||||
|
||||
if (GOAMDSMI_STATUS_SUCCESS == check_amdgpu_driver())
|
||||
{
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_2)) {printf("AMDSMI, Status, Identified GPU Driver and going to enumurate only GPU\n");}
|
||||
|
||||
if( (AMDSMI_STATUS_SUCCESS != amdsmi_init(AMDSMI_INIT_AMD_GPUS)) ||
|
||||
(AMDSMI_STATUS_SUCCESS != amdsmi_get_socket_handles(&num_gpuSockets, nullptr)) ||
|
||||
(AMDSMI_STATUS_SUCCESS != amdsmi_get_socket_handles(&num_gpuSockets, &amdsmi_gpusocket_handle_all_socket[0])) ||
|
||||
(GOAMDSMI_VALUE_0 == num_gpuSockets))
|
||||
{
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, Failed, AMDSMIGPUInit:0, GpuNumSockets=0\n");}
|
||||
return GOAMDSMI_STATUS_FAILURE;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_3)) {printf("AMDSMI, Status, Missing GPU Driver and not going to enumurate only GPU\n");}
|
||||
}
|
||||
|
||||
//GPU
|
||||
for(uint32_t gpu_socket_counter = 0; gpu_socket_counter < num_gpuSockets; gpu_socket_counter++)
|
||||
{
|
||||
uint32_t num_gpu_devices = GOAMDSMI_VALUE_0;
|
||||
|
||||
processor_type_t gpu_device_processor_type = AMDSMI_PROCESSOR_TYPE_AMD_GPU;
|
||||
if( (AMDSMI_STATUS_SUCCESS == amdsmi_get_processor_handles_by_type(amdsmi_gpusocket_handle_all_socket[gpu_socket_counter], gpu_device_processor_type, nullptr, &num_gpu_devices)) &&
|
||||
(GOAMDSMI_VALUE_0 != num_gpu_devices) &&
|
||||
(AMDSMI_STATUS_SUCCESS == amdsmi_get_processor_handles_by_type(amdsmi_gpusocket_handle_all_socket[gpu_socket_counter], gpu_device_processor_type, &amdsmi_processor_handle_all_gpu_device_across_socket[num_gpu_devices_inAllSocket], &num_gpu_devices)))
|
||||
{
|
||||
num_gpu_devices_inAllSocket = num_gpu_devices_inAllSocket+num_gpu_devices;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//CPU
|
||||
if((GOAMDSMI_CPU_INIT == goamdsmi_Init) && ((GOAMDSMI_VALUE_0 == num_cpuSockets)||(GOAMDSMI_VALUE_0 == num_cpu_inAllSocket)||(GOAMDSMI_VALUE_0 == num_cpu_physicalCore_inAllSocket)))
|
||||
{
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, Failed, CPU Enumuration Failed AMDSMICPUInit:%d, CpuSocketCount:%d, CpuCount:%d, CpuPhysicalCoreCount:%d,\n", GOAMDSMI_STATUS_FAILURE, num_cpuSockets, num_cpu_inAllSocket, num_cpu_physicalCore_inAllSocket);}
|
||||
return GOAMDSMI_STATUS_FAILURE;
|
||||
}
|
||||
|
||||
//GPU
|
||||
if((GOAMDSMI_GPU_INIT == goamdsmi_Init) && ((GOAMDSMI_VALUE_0 == num_gpuSockets)||(GOAMDSMI_VALUE_0 == num_gpu_devices_inAllSocket)))
|
||||
{
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, Failed, GPU Enumuration Failed AMDSMIGPUInit:%d, GpuSocketCount:%d, GpuCount:%d\n", GOAMDSMI_STATUS_FAILURE, num_gpuSockets, num_gpu_devices_inAllSocket);}
|
||||
return GOAMDSMI_STATUS_FAILURE;
|
||||
}
|
||||
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1))
|
||||
{
|
||||
if((GOAMDSMI_CPU_INIT == goamdsmi_Init) || apuInitCompleted) printf("AMDSMI, Status, AMDSMICPUInit:%d, CpuSocketCount:%d, CpuCount:%d, CpuPhysicalCoreCount:%d,\n", GOAMDSMI_STATUS_SUCCESS, num_cpuSockets, num_cpu_inAllSocket, num_cpu_physicalCore_inAllSocket);
|
||||
if((GOAMDSMI_GPU_INIT == goamdsmi_Init) || apuInitCompleted) printf("AMDSMI, Status, AMDSMIGPUInit:%d, GpuSocketCount:%d, GpuCount:%d\n", GOAMDSMI_STATUS_SUCCESS, num_gpuSockets, num_gpu_devices_inAllSocket);
|
||||
}
|
||||
|
||||
return GOAMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
////////////////////////////////////////////////------------CPU------------////////////////////////////////////////////////
|
||||
bool goamdsmi_cpu_init()
|
||||
{
|
||||
bool cpu_init_success = false;
|
||||
if(GOAMDSMI_STATUS_SUCCESS == go_shim_amdsmiapu_init(GOAMDSMI_CPU_INIT))
|
||||
{
|
||||
if((num_cpu_inAllSocket) && (num_cpu_physicalCore_inAllSocket)) cpu_init_success = true;
|
||||
}
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, %s, InitAMDSMICPUInit:%d, CpuSocketCount:%d, CpuCount:%d, CpuPhysicalCoreCount:%d,\n", cpu_init_success?"Success":"Failed", cpu_init_success?1:0, num_cpuSockets, num_cpu_inAllSocket, num_cpu_physicalCore_inAllSocket);}
|
||||
return cpu_init_success;
|
||||
}
|
||||
|
||||
uint32_t goamdsmi_cpu_threads_per_core_get()
|
||||
{
|
||||
bool readSuccess = false;
|
||||
uint32_t threads_per_core_temp = GOAMDSMI_VALUE_0;
|
||||
|
||||
if((AMDSMI_STATUS_SUCCESS == amdsmi_get_threads_per_core(&threads_per_core_temp))) readSuccess = true;
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, %s, CpuThreadsPerCore:%lu\n", readSuccess?"Success":"Failed", (unsigned long)(threads_per_core_temp));}
|
||||
|
||||
return threads_per_core_temp;
|
||||
}
|
||||
|
||||
uint32_t goamdsmi_cpu_number_of_threads_get()
|
||||
{
|
||||
bool readSuccess = false;
|
||||
uint32_t number_of_threads = GOAMDSMI_VALUE_0;
|
||||
uint32_t num_threads_per_core = goamdsmi_cpu_threads_per_core_get();
|
||||
if(0 != num_threads_per_core)
|
||||
{
|
||||
readSuccess = true;
|
||||
number_of_threads = num_cpu_physicalCore_inAllSocket*num_threads_per_core;
|
||||
}
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, %s, CpuNumThreads:%lu\n", readSuccess?"Success":"Failed", (unsigned long)(number_of_threads));}
|
||||
return number_of_threads;
|
||||
}
|
||||
|
||||
uint32_t goamdsmi_cpu_number_of_sockets_get()
|
||||
{
|
||||
uint32_t number_of_sockets = num_cpuSockets;
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, Success, CpuNumSockets:%lu\n", (unsigned long)(number_of_sockets));}
|
||||
return number_of_sockets;
|
||||
}
|
||||
|
||||
uint64_t goamdsmi_cpu_core_energy_get(uint32_t thread_index)
|
||||
{
|
||||
bool readSuccess = false;
|
||||
uint64_t core_energy_temp = GOAMDSMI_UINT64_MAX;
|
||||
uint32_t physicalCore_index = thread_index%num_cpu_physicalCore_inAllSocket;
|
||||
|
||||
if (AMDSMI_STATUS_SUCCESS == amdsmi_get_cpu_core_energy(amdsmi_processor_handle_all_cpu_physicalCore_across_socket[physicalCore_index], &core_energy_temp)) readSuccess = true;
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, %s for Thread:%d PC:%d, CpuCoreEnergy:%llu, CpuCoreEnergyJoules:%.6f, CpuCoreEnergyKJoules:%.9f\n", readSuccess?"Success":"Failed", thread_index, physicalCore_index, (unsigned long long)(core_energy_temp), ((double)(core_energy_temp))/1000000, ((double)(core_energy_temp))/1000000000);}
|
||||
|
||||
return core_energy_temp;
|
||||
}
|
||||
|
||||
uint64_t goamdsmi_cpu_socket_energy_get(uint32_t socket_index)
|
||||
{
|
||||
bool readSuccess = false;
|
||||
uint64_t socket_energy_temp = GOAMDSMI_UINT64_MAX;
|
||||
if ((AMDSMI_STATUS_SUCCESS == amdsmi_get_cpu_socket_energy(amdsmi_processor_handle_all_cpu_across_socket[socket_index], &socket_energy_temp))) readSuccess = true;
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, %s for Socket:%d, CpuSocketEnergy:%llu, CpuSocketEnergyJoules:%.6f, CpuSocketEnergyKJoules:%.9f\n", readSuccess?"Success":"Failed", socket_index, (unsigned long long)(socket_energy_temp), ((double)(socket_energy_temp))/1000000, ((double)(socket_energy_temp))/1000000000);}
|
||||
|
||||
return socket_energy_temp;
|
||||
}
|
||||
|
||||
uint32_t goamdsmi_cpu_prochot_status_get(uint32_t socket_index)
|
||||
{
|
||||
bool readSuccess = false;
|
||||
uint32_t prochot_temp = GOAMDSMI_UINT32_MAX;
|
||||
if ((AMDSMI_STATUS_SUCCESS == amdsmi_get_cpu_prochot_status(amdsmi_processor_handle_all_cpu_across_socket[socket_index], &prochot_temp))) readSuccess = true;
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, %s for Socket:%d, CpuProchotStatus:%lu\n", readSuccess?"Success":"Failed", socket_index, (unsigned long)(prochot_temp));}
|
||||
|
||||
return prochot_temp;
|
||||
}
|
||||
|
||||
uint32_t goamdsmi_cpu_socket_power_get(uint32_t socket_index)
|
||||
{
|
||||
bool readSuccess = false;
|
||||
uint32_t socket_power_temp = GOAMDSMI_UINT32_MAX;
|
||||
if ((AMDSMI_STATUS_SUCCESS == amdsmi_get_cpu_socket_power(amdsmi_processor_handle_all_cpu_across_socket[socket_index], &socket_power_temp))) readSuccess = true;
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, %s for Socket:%d, CpuSocketPower:%lu, CpuSocketPowerWatt:%.3f\n", readSuccess?"Success":"Failed", socket_index, (unsigned long)(socket_power_temp), ((double)(socket_power_temp))/1000);}
|
||||
|
||||
return socket_power_temp;
|
||||
}
|
||||
|
||||
uint32_t goamdsmi_cpu_socket_power_cap_get(uint32_t socket_index)
|
||||
{
|
||||
bool readSuccess = false;
|
||||
uint32_t socket_power_cap_temp = GOAMDSMI_UINT32_MAX;
|
||||
if ((AMDSMI_STATUS_SUCCESS == amdsmi_get_cpu_socket_power_cap(amdsmi_processor_handle_all_cpu_across_socket[socket_index], &socket_power_cap_temp))) readSuccess = true;
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, %s for Socket:%d, CpuSocketPowerCap:%lu, CpuSocketPowerCapWatt:%.3f\n", readSuccess?"Success":"Failed", socket_index, (unsigned long)(socket_power_cap_temp), ((double)(socket_power_cap_temp))/1000);}
|
||||
|
||||
return socket_power_cap_temp;
|
||||
}
|
||||
|
||||
uint32_t goamdsmi_cpu_core_boostlimit_get(uint32_t thread_index)
|
||||
{
|
||||
bool readSuccess = false;
|
||||
uint32_t core_boostlimit_temp = GOAMDSMI_UINT32_MAX;
|
||||
uint32_t physicalCore_index = thread_index%num_cpu_physicalCore_inAllSocket;
|
||||
|
||||
if (AMDSMI_STATUS_SUCCESS == amdsmi_get_cpu_core_boostlimit(amdsmi_processor_handle_all_cpu_physicalCore_across_socket[physicalCore_index], &core_boostlimit_temp)) readSuccess = true;
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, %s for Thread:%d PC:%d, CpuCoreBoostLimit:%lu\n", readSuccess?"Success":"Failed", thread_index, physicalCore_index, (unsigned long)(core_boostlimit_temp));}
|
||||
|
||||
return core_boostlimit_temp;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////------------GPU------------////////////////////////////////////////////////
|
||||
bool goamdsmi_gpu_init()
|
||||
{
|
||||
bool gpu_init_success = false;
|
||||
if(GOAMDSMI_STATUS_SUCCESS == go_shim_amdsmiapu_init(GOAMDSMI_GPU_INIT))
|
||||
{
|
||||
if((num_gpu_devices_inAllSocket)) gpu_init_success = true;
|
||||
}
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, %s, InitAMDSMIGPUInit:%d, GpuSocketCount:%d, GpuCount:%d\n", gpu_init_success?"Success":"Failed", gpu_init_success?1:0, num_gpuSockets, num_gpu_devices_inAllSocket);}
|
||||
|
||||
return gpu_init_success;
|
||||
}
|
||||
|
||||
bool goamdsmi_gpu_shutdown()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
uint32_t goamdsmi_gpu_num_monitor_devices()
|
||||
{
|
||||
uint32_t gpu_num_monitor_devices = num_gpu_devices_inAllSocket;
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, Success, GpuNumMonitorDevices:%lu\n", (unsigned long)(gpu_num_monitor_devices));}
|
||||
return gpu_num_monitor_devices;
|
||||
}
|
||||
|
||||
char* goamdsmi_gpu_dev_name_get(uint32_t dv_ind)
|
||||
{
|
||||
uint32_t len = 256;
|
||||
char* dev_name = (char*)malloc(sizeof(char)*len);dev_name[0] = '\0';
|
||||
strcpy(dev_name, GOAMDSMI_STRING_NA);
|
||||
|
||||
return dev_name;
|
||||
}
|
||||
|
||||
uint16_t goamdsmi_gpu_dev_id_get(uint32_t dv_ind)
|
||||
{
|
||||
bool readSuccess = false;
|
||||
uint16_t gpu_dev_id_temp = GOAMDSMI_UINT16_MAX;
|
||||
|
||||
if((dv_ind < num_gpu_devices_inAllSocket) && (AMDSMI_STATUS_SUCCESS == amdsmi_get_gpu_id(amdsmi_processor_handle_all_gpu_device_across_socket[dv_ind], &gpu_dev_id_temp))) readSuccess = true;
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, %s for Gpu:%d, GpuDevId:%d\n", readSuccess?"Success":"Failed", dv_ind, gpu_dev_id_temp);}
|
||||
|
||||
return gpu_dev_id_temp;
|
||||
}
|
||||
|
||||
uint64_t goamdsmi_gpu_dev_pci_id_get(uint32_t dv_ind)
|
||||
{
|
||||
uint64_t gpu_pci_id = GOAMDSMI_UINT64_MAX;
|
||||
return gpu_pci_id;
|
||||
}
|
||||
|
||||
char* goamdsmi_gpu_dev_vendor_name_get(uint32_t dv_ind)
|
||||
{
|
||||
uint32_t len = 256;
|
||||
char* gpu_vendor_name = (char*)malloc(sizeof(char)*len);gpu_vendor_name[0] = '\0';
|
||||
strcpy(gpu_vendor_name, GOAMDSMI_STRING_NA);
|
||||
|
||||
return gpu_vendor_name;
|
||||
}
|
||||
|
||||
char* goamdsmi_gpu_dev_vbios_version_get(uint32_t dv_ind)
|
||||
{
|
||||
uint32_t len = 256;
|
||||
char* vbios_version = (char*)malloc(sizeof(char)*len);vbios_version[0] = '\0';
|
||||
strcpy(vbios_version, GOAMDSMI_STRING_NA);
|
||||
|
||||
return vbios_version;
|
||||
}
|
||||
|
||||
uint64_t goamdsmi_gpu_dev_power_cap_get(uint32_t dv_ind)
|
||||
{
|
||||
bool readSuccess = false;
|
||||
uint64_t gpu_power_cap = GOAMDSMI_UINT64_MAX;
|
||||
amdsmi_power_cap_info_t amdsmi_power_cap_info_temp = {0};
|
||||
|
||||
if((dv_ind < num_gpu_devices_inAllSocket) && (AMDSMI_STATUS_SUCCESS == amdsmi_get_power_cap_info(amdsmi_processor_handle_all_gpu_device_across_socket[dv_ind], GPU_SENSOR_0, &amdsmi_power_cap_info_temp)))
|
||||
{
|
||||
readSuccess = true;
|
||||
gpu_power_cap = amdsmi_power_cap_info_temp.power_cap;
|
||||
}
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, %s for Gpu:%d, GpuPowerCap:%llu, GpuPowerCapInWatt:%.6f\n", readSuccess?"Success":"Failed", dv_ind, (unsigned long long)(gpu_power_cap), ((double)(gpu_power_cap))/1000000);}
|
||||
return gpu_power_cap;
|
||||
}
|
||||
|
||||
uint64_t goamdsmi_gpu_dev_power_get(uint32_t dv_ind)
|
||||
{
|
||||
uint64_t gpu_power = GOAMDSMI_UINT64_MAX;
|
||||
uint64_t gpu_power_temp = GOAMDSMI_UINT64_MAX;
|
||||
amdsmi_power_info_t amdsmi_power_info_temp = {0};
|
||||
|
||||
if((dv_ind < num_gpu_devices_inAllSocket) && (AMDSMI_STATUS_SUCCESS == amdsmi_get_power_info(amdsmi_processor_handle_all_gpu_device_across_socket[dv_ind], &amdsmi_power_info_temp)))
|
||||
{
|
||||
gpu_power_temp = amdsmi_power_info_temp.average_socket_power;
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_2)) {printf("AMDSMI, Success for Gpu:%d, GpuPowerAverage:%llu, GpuPowerAverageinWatt:%.6f\n", dv_ind, (unsigned long long)(gpu_power_temp), ((double)(gpu_power_temp))/1000000);}
|
||||
|
||||
if(MAX_GPU_POWER_FROM_DRIVER == gpu_power_temp)
|
||||
{
|
||||
gpu_power_temp = amdsmi_power_info_temp.current_socket_power;
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_2)) {printf("AMDSMI, Success for Gpu:%d, GpuPowerCurrent:%llu, GpuPowerCurrentinWatt:%.6f\n", dv_ind, (unsigned long long)(gpu_power_temp), ((double)(gpu_power_temp))/1000000);}
|
||||
}
|
||||
gpu_power = gpu_power_temp;
|
||||
gpu_power = (gpu_power)*1000000;//to maintain backward compatibity with old ROCM SMI
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, Success for Gpu:%d, GpuPower:%llu, GpuPowerinWatt:%.6f\n", dv_ind, (unsigned long long)(gpu_power), ((double)(gpu_power))/1000000);}
|
||||
return gpu_power;
|
||||
}
|
||||
|
||||
amdsmi_gpu_metrics_t metrics = {0};
|
||||
if((dv_ind < num_gpu_devices_inAllSocket) && (AMDSMI_STATUS_SUCCESS == amdsmi_get_gpu_metrics_info(amdsmi_processor_handle_all_gpu_device_across_socket[dv_ind], &metrics)))
|
||||
{
|
||||
gpu_power_temp = metrics.average_socket_power;
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_2)) {printf("AMDSMI, Success for Gpu:%d, GpuPowerAverageFromMetrics:%llu, GpuPowerAverageFromMetricsinWatt:%.6f\n", dv_ind, (unsigned long long)gpu_power_temp, ((double)(gpu_power_temp))/1000000);}
|
||||
|
||||
if(MAX_GPU_POWER_FROM_DRIVER == gpu_power_temp)
|
||||
{
|
||||
gpu_power_temp = metrics.current_socket_power;
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_2)) {printf("AMDSMI, Success for Gpu:%d, GpuPowerCurrentFromMetrics:%llu, GpuPowerCurrentFromMetricsinWatt:%.6f\n", dv_ind, (unsigned long long)gpu_power_temp, ((double)(gpu_power_temp))/1000000);}
|
||||
}
|
||||
gpu_power = gpu_power_temp;
|
||||
gpu_power = (gpu_power)*1000000;//to maintain backward compatibity with old ROCM SMI
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, Success for Gpu:%d, GpuPowerFromMetrics:%llu, GpuPowerFromMetricsinWatt:%.6f\n", dv_ind, (unsigned long long)(gpu_power), ((double)(gpu_power))/1000000);}
|
||||
return gpu_power;
|
||||
}
|
||||
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, Failed for Gpu:%d, GpuPower:%llu, GpuPowerinWatt:%.6f\n", dv_ind, (unsigned long long)(gpu_power), ((double)(gpu_power))/1000000);}
|
||||
return gpu_power;
|
||||
}
|
||||
|
||||
uint64_t goamdsmi_gpu_dev_temp_metric_get(uint32_t dv_ind, uint32_t sensor, uint32_t metric)
|
||||
{
|
||||
bool readSuccess = false;
|
||||
uint64_t gpu_temperature = GOAMDSMI_UINT64_MAX;
|
||||
uint64_t gpu_temperature_temp = GOAMDSMI_UINT64_MAX;
|
||||
|
||||
if((dv_ind < num_gpu_devices_inAllSocket) && (AMDSMI_STATUS_SUCCESS == amdsmi_get_temp_metric(amdsmi_processor_handle_all_gpu_device_across_socket[dv_ind], sensor, metric, &gpu_temperature_temp)))
|
||||
{
|
||||
readSuccess = true;
|
||||
gpu_temperature = gpu_temperature_temp;
|
||||
gpu_temperature = (gpu_temperature)*1000;//to maintain backward compatibity with old ROCM SMI
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, %s for Gpu:%d Sensor:%d Metric:%d, GpuTemperature:%llu, GpuTemperatureInDegree:%.3f\n", readSuccess?"Success":"Failed", dv_ind, sensor, metric, (unsigned long long)(gpu_temperature), ((double)(gpu_temperature))/1000);}
|
||||
}
|
||||
return gpu_temperature;
|
||||
}
|
||||
|
||||
uint32_t goamdsmi_gpu_dev_overdrive_level_get(uint32_t dv_ind)
|
||||
{
|
||||
uint32_t gpu_overdrive_level = GOAMDSMI_UINT32_MAX;
|
||||
return gpu_overdrive_level;
|
||||
}
|
||||
|
||||
uint32_t goamdsmi_gpu_dev_mem_overdrive_level_get(uint32_t dv_ind)
|
||||
{
|
||||
uint32_t gpu_mem_overdrive_level = GOAMDSMI_UINT32_MAX;
|
||||
return gpu_mem_overdrive_level;
|
||||
}
|
||||
|
||||
uint32_t goamdsmi_gpu_dev_perf_level_get(uint32_t dv_ind)
|
||||
{
|
||||
uint32_t gpu_perf = GOAMDSMI_UINT32_MAX;
|
||||
return gpu_perf;
|
||||
}
|
||||
|
||||
uint64_t goamdsmi_gpu_dev_gpu_clk_freq_get_sclk(uint32_t dv_ind)
|
||||
{
|
||||
bool readSuccess = false;
|
||||
uint64_t gpu_sclk_freq = GOAMDSMI_UINT64_MAX;
|
||||
amdsmi_frequencies_t freq = {0};
|
||||
|
||||
if((dv_ind < num_gpu_devices_inAllSocket) && (AMDSMI_STATUS_SUCCESS == amdsmi_get_clk_freq(amdsmi_processor_handle_all_gpu_device_across_socket[dv_ind], AMDSMI_CLK_TYPE_SYS, &freq)))
|
||||
{
|
||||
readSuccess = true;
|
||||
gpu_sclk_freq = freq.frequency[freq.current];
|
||||
}
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, %s for Gpu:%d, GpuSclkFreq:%llu, GpuSclkFreqMhz:%.6f\n", readSuccess?"Success":"Failed", dv_ind, (unsigned long long)(gpu_sclk_freq), ((double)(gpu_sclk_freq))/1000000);}
|
||||
|
||||
return gpu_sclk_freq;
|
||||
}
|
||||
|
||||
uint64_t goamdsmi_gpu_dev_gpu_clk_freq_get_mclk(uint32_t dv_ind)
|
||||
{
|
||||
bool readSuccess = false;
|
||||
uint64_t gpu_memclk_freq = GOAMDSMI_UINT64_MAX;
|
||||
amdsmi_frequencies_t freq = {0};
|
||||
|
||||
if((dv_ind < num_gpu_devices_inAllSocket) && (AMDSMI_STATUS_SUCCESS == amdsmi_get_clk_freq(amdsmi_processor_handle_all_gpu_device_across_socket[dv_ind], AMDSMI_CLK_TYPE_MEM, &freq)))
|
||||
{
|
||||
readSuccess = true;
|
||||
gpu_memclk_freq = freq.frequency[freq.current];
|
||||
}
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, %s for Gpu:%d, GpuMclkFreq:%llu, GpuMclkFreqMhz:%.6f\n", readSuccess?"Success":"Failed", dv_ind, (unsigned long long)(gpu_memclk_freq), ((double)(gpu_memclk_freq))/1000000);}
|
||||
|
||||
return gpu_memclk_freq;
|
||||
}
|
||||
|
||||
uint64_t goamdsmi_gpu_od_volt_freq_range_min_get_sclk(uint32_t dv_ind)
|
||||
{
|
||||
uint64_t gpu_min_sclk = GOAMDSMI_UINT64_MAX;
|
||||
return gpu_min_sclk;
|
||||
}
|
||||
|
||||
uint64_t goamdsmi_gpu_od_volt_freq_range_min_get_mclk(uint32_t dv_ind)
|
||||
{
|
||||
uint64_t gpu_min_memclk = GOAMDSMI_UINT64_MAX;
|
||||
return gpu_min_memclk;
|
||||
}
|
||||
|
||||
uint64_t goamdsmi_gpu_od_volt_freq_range_max_get_sclk(uint32_t dv_ind)
|
||||
{
|
||||
uint64_t gpu_max_sclk = GOAMDSMI_UINT64_MAX;
|
||||
return gpu_max_sclk;
|
||||
}
|
||||
|
||||
uint64_t goamdsmi_gpu_od_volt_freq_range_max_get_mclk(uint32_t dv_ind)
|
||||
{
|
||||
uint64_t gpu_max_memclk = GOAMDSMI_UINT64_MAX;
|
||||
return gpu_max_memclk;
|
||||
}
|
||||
|
||||
uint32_t goamdsmi_gpu_dev_gpu_busy_percent_get(uint32_t dv_ind)
|
||||
{
|
||||
bool readSuccess = false;
|
||||
uint32_t gpu_busy_percent = GOAMDSMI_UINT32_MAX;
|
||||
amdsmi_engine_usage_t amdsmi_engine_usage_temp;
|
||||
|
||||
if(AMDSMI_STATUS_SUCCESS == amdsmi_get_gpu_activity(amdsmi_processor_handle_all_gpu_device_across_socket[dv_ind], &amdsmi_engine_usage_temp))
|
||||
{
|
||||
readSuccess = true;
|
||||
gpu_busy_percent = amdsmi_engine_usage_temp.gfx_activity;
|
||||
}
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, %s for Gpu:%d, GpuBusyPerc:%lu\n", readSuccess?"Success":"Failed", dv_ind, (unsigned long)(gpu_busy_percent));}
|
||||
|
||||
return gpu_busy_percent;
|
||||
}
|
||||
|
||||
uint64_t goamdsmi_gpu_dev_gpu_memory_busy_percent_get(uint32_t dv_ind)
|
||||
{
|
||||
bool readSuccess = false;
|
||||
uint64_t gpu_memory_busy_percent = GOAMDSMI_UINT64_MAX;
|
||||
uint64_t gpu_memory_usage_temp = GOAMDSMI_UINT64_MAX;
|
||||
uint64_t gpu_memory_total_temp = GOAMDSMI_UINT64_MAX;
|
||||
|
||||
if( (AMDSMI_STATUS_SUCCESS == amdsmi_get_gpu_memory_usage(amdsmi_processor_handle_all_gpu_device_across_socket[dv_ind], AMDSMI_MEM_TYPE_VRAM, &gpu_memory_usage_temp))&&
|
||||
(AMDSMI_STATUS_SUCCESS == amdsmi_get_gpu_memory_total(amdsmi_processor_handle_all_gpu_device_across_socket[dv_ind], AMDSMI_MEM_TYPE_VRAM, &gpu_memory_total_temp)))
|
||||
{
|
||||
readSuccess = true;
|
||||
gpu_memory_busy_percent = (uint64_t)(gpu_memory_usage_temp*100)/gpu_memory_total_temp;
|
||||
}
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, %s for Gpu:%d, GpuMemoryBusyPerc:%llu\n", readSuccess?"Success":"Failed", dv_ind, (unsigned long long)(gpu_memory_busy_percent));}
|
||||
|
||||
return gpu_memory_busy_percent;
|
||||
}
|
||||
|
||||
uint64_t goamdsmi_gpu_dev_gpu_memory_usage_get(uint32_t dv_ind)
|
||||
{
|
||||
bool readSuccess = false;
|
||||
uint64_t gpu_memory_usage = GOAMDSMI_UINT64_MAX;
|
||||
uint64_t gpu_memory_usage_temp = GOAMDSMI_UINT64_MAX;
|
||||
|
||||
if(AMDSMI_STATUS_SUCCESS == amdsmi_get_gpu_memory_usage(amdsmi_processor_handle_all_gpu_device_across_socket[dv_ind], AMDSMI_MEM_TYPE_VRAM, &gpu_memory_usage_temp))
|
||||
{
|
||||
readSuccess = true;
|
||||
gpu_memory_usage = (uint64_t)gpu_memory_usage_temp;
|
||||
}
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, %s for Gpu:%d, GpuMemoryUsage:%llu\n", readSuccess?"Success":"Failed", dv_ind, (unsigned long long)(gpu_memory_usage));}
|
||||
|
||||
return gpu_memory_usage;
|
||||
}
|
||||
|
||||
uint64_t goamdsmi_gpu_dev_gpu_memory_total_get(uint32_t dv_ind)
|
||||
{
|
||||
bool readSuccess = false;
|
||||
uint64_t gpu_memory_total = GOAMDSMI_UINT64_MAX;
|
||||
uint64_t gpu_memory_total_temp = GOAMDSMI_UINT64_MAX;
|
||||
|
||||
if(AMDSMI_STATUS_SUCCESS == amdsmi_get_gpu_memory_total(amdsmi_processor_handle_all_gpu_device_across_socket[dv_ind], AMDSMI_MEM_TYPE_VRAM, &gpu_memory_total_temp))
|
||||
{
|
||||
readSuccess = true;
|
||||
gpu_memory_total = (uint64_t)gpu_memory_total_temp;
|
||||
}
|
||||
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_1)) {printf("AMDSMI, %s for Gpu:%d, GpuMemoryTotal:%llu\n", readSuccess?"Success":"Failed", dv_ind, (unsigned long long)(gpu_memory_total));}
|
||||
|
||||
return gpu_memory_total;
|
||||
}
|
||||
@@ -0,0 +1,559 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
/*
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "goamdsmi.h"
|
||||
////////////////////////////////////////////////------------CPU------------////////////////////////////////////////////////
|
||||
/**
|
||||
* @brief Go language stub to initialize the AMDSMI library
|
||||
*
|
||||
* @retval ::bool value of true upon success
|
||||
* @retval false is returned upon failure.
|
||||
*
|
||||
*/
|
||||
bool goamdsmi_cpu_init();
|
||||
|
||||
/**
|
||||
* @brief Go language stub to get the core energy for a given core
|
||||
*
|
||||
* @details Given a core index @p num, this function will call the
|
||||
* esmi_core_energy_get() function to update the @p penergy in micro Joules.
|
||||
* This value is then passed as a uint64_t val to the Go routine that called it.
|
||||
*
|
||||
* @param[in] num is the core index
|
||||
*
|
||||
* @retval ::uint64_t value of the penergy in micro Joules.
|
||||
* @retval zero is returned upon failure.
|
||||
*
|
||||
*/
|
||||
uint64_t goamdsmi_cpu_core_energy_get(uint32_t num);
|
||||
|
||||
/**
|
||||
* @brief Go language stub to get the socket energy for a given socket
|
||||
*
|
||||
* @details Given a socket index @p socket_idx, this function will call the
|
||||
* esmi_socket_energy_get() function to get the socket energy counter of an
|
||||
* online cpu in that socket. This value is then passed as a uint64_t val to
|
||||
* the Go routine that called it.
|
||||
*
|
||||
* @param[in] socket_idx is the socket index
|
||||
*
|
||||
* @retval ::uint64_t value of the socket energy counter
|
||||
* @retval zero is returned upon failure.
|
||||
*
|
||||
*/
|
||||
uint64_t goamdsmi_cpu_socket_energy_get(uint32_t socket_idx);
|
||||
|
||||
/**
|
||||
* @brief Go language stub to get normalized status of
|
||||
* the processor's PROCHOT status.
|
||||
* 1 - PROCHOT active, 0 - PROCHOT inactive
|
||||
*
|
||||
* @details Given a socket index @p socket_idx and this function will get
|
||||
* PROCHOT at @p prochot.
|
||||
*
|
||||
* @param[in] socket_idx a socket index
|
||||
*
|
||||
* @retval ::uint32_t value of the prochot status
|
||||
* @retval -1 is returned upon failure or if status is inactive.
|
||||
*
|
||||
*/
|
||||
uint32_t goamdsmi_cpu_prochot_status_get(uint32_t socket_idx);
|
||||
|
||||
/**
|
||||
* @brief Go language stub to get the instantaneous power
|
||||
* consumption of the provided socket.
|
||||
*
|
||||
* @details Given a socket index @p sock_ind this function will
|
||||
* get the current power consumption (in milliwatts).
|
||||
*
|
||||
* @param[in] sock_ind a socket index
|
||||
*
|
||||
* @retval ::uint32_t value of the socket power
|
||||
* @retval -1 is returned upon failure.
|
||||
*
|
||||
*/
|
||||
uint32_t goamdsmi_cpu_socket_power_get(uint32_t sock_ind);
|
||||
|
||||
/**
|
||||
* @brief Go language stub to get the current power cap value
|
||||
* for a given socket.
|
||||
*
|
||||
* @details This function will return the valid power cap @p pcap for a given
|
||||
* socket @p sock_ind, this value will be used by the system to limit
|
||||
* the power usage (in milliwatts).
|
||||
*
|
||||
* @param[in] sock_ind a socket index
|
||||
*
|
||||
* @retval ::uint32_t value of the socket power cap
|
||||
* @retval -1 is returned upon failure.
|
||||
*
|
||||
*/
|
||||
uint32_t goamdsmi_cpu_socket_power_cap_get(uint32_t sock_ind);
|
||||
|
||||
/**
|
||||
* @brief Go language stub to get the boostlimit value for a given core
|
||||
*
|
||||
* @details This function will return the core's current boost limit
|
||||
* @p boostlimit for a particular @p socket
|
||||
*
|
||||
* @param[in] socket a socket index
|
||||
*
|
||||
* @retval ::uint32_t value of the boostlimit
|
||||
* @retval -1 is returned upon failure..
|
||||
*
|
||||
*/
|
||||
uint32_t goamdsmi_cpu_core_boostlimit_get(uint32_t socket);
|
||||
|
||||
/**
|
||||
* @brief Go stub to get the number of threads per core in the system
|
||||
*
|
||||
* @retval ::Number of threads per core
|
||||
* @retval Zero is returned upon failure.
|
||||
*/
|
||||
uint32_t goamdsmi_cpu_threads_per_core_get();
|
||||
|
||||
/**
|
||||
* @brief Go stub to get the number of threads available in the system
|
||||
*
|
||||
* @retval ::Number of threads
|
||||
* @retval Zero is returned upon failure.
|
||||
*/
|
||||
uint32_t goamdsmi_cpu_number_of_threads_get();
|
||||
|
||||
/**
|
||||
* @brief Go stub to get the total number of processor sockets
|
||||
* available in the system
|
||||
*
|
||||
* @retval ::Number of threads per core
|
||||
* @retval Zero is returned upon failure.
|
||||
*/
|
||||
uint32_t goamdsmi_cpu_threads_per_core_get();
|
||||
|
||||
/**
|
||||
* @brief Go stub to get the number of threads available in the system
|
||||
*
|
||||
* @retval ::Number of threads
|
||||
* @retval Zero is returned upon failure.
|
||||
*/
|
||||
uint32_t goamdsmi_cpu_number_of_threads_get();
|
||||
|
||||
/**
|
||||
* @brief Go stub to get the total number of processor sockets
|
||||
* available in the system
|
||||
*
|
||||
* @retval ::uint32_t value of the socket number
|
||||
* @retval Zero is returned upon failure.
|
||||
*/
|
||||
uint32_t goamdsmi_cpu_number_of_sockets_get();
|
||||
|
||||
////////////////////////////////////////////////------------GPU------------////////////////////////////////////////////////
|
||||
/**
|
||||
* @brief Go language stub to initialize the ROCm-SMI library
|
||||
*
|
||||
* @retval ::bool value of true upon success
|
||||
* @retval false is returned upon failure.
|
||||
*
|
||||
*/
|
||||
bool goamdsmi_gpu_init();
|
||||
|
||||
/**
|
||||
* @brief Go language stub to shut down the ROCm-SMI library
|
||||
* and do necessary clean up
|
||||
*
|
||||
* @retval ::bool value of true upon success
|
||||
* @retval false is returned upon failure.
|
||||
*
|
||||
*/
|
||||
bool goamdsmi_gpu_shutdown();
|
||||
|
||||
/**
|
||||
* @brief Go language stub to get the number of GPU devices
|
||||
*
|
||||
* @details This function will call the rsmi_num_monitor_devices()
|
||||
* function to return the number of GPU devices to be monitored.
|
||||
* This value is then passed as a uint val to the Go routine that
|
||||
* called it.
|
||||
*
|
||||
* @retval ::uint32_t value of num GPUs
|
||||
* @retval zero is returned upon failure.
|
||||
*
|
||||
*/
|
||||
uint32_t goamdsmi_gpu_num_monitor_devices();
|
||||
|
||||
/**
|
||||
* @brief Go language stub to get the gpu device name string
|
||||
*
|
||||
* @details This function will call the rsmi_dev_name_get()
|
||||
* function to write the gpu device name string (up to len characters)
|
||||
* for device dv_ind and return a char pointer. This value is then
|
||||
* passed as char * to the Go routine that called it. The caller of this
|
||||
* function must free the allocated buffer for the device name.
|
||||
*
|
||||
* @param[in] ::uint32_t device index
|
||||
*
|
||||
* @retval ::char* VBIOS identifier
|
||||
* @retval NA is returned upon failure.
|
||||
*
|
||||
*/
|
||||
char* goamdsmi_gpu_dev_name_get(uint32_t dv_ind);
|
||||
|
||||
/**
|
||||
* @brief Go language stub to get the GPU device id
|
||||
*
|
||||
* @details This function will call the rsmi_dev_id_get()
|
||||
* function to return the GPU device id. This value is then
|
||||
* passed as a uint16_t val to the Go routine that
|
||||
* called it.
|
||||
*
|
||||
* @param[in] ::uint32_t device index
|
||||
*
|
||||
* @retval ::uint16_t value of num GPUs
|
||||
* @retval -1 is returned upon failure.
|
||||
*
|
||||
*/
|
||||
uint16_t goamdsmi_gpu_dev_id_get(uint32_t dv_ind);
|
||||
|
||||
/**
|
||||
* @brief Go language stub to get the GPU unique pci id
|
||||
*
|
||||
* @details This function will call the rsmi_dev_pci_id_get()
|
||||
* function to return the unique PCI device identifier
|
||||
* associated for a device. This value is then passed as
|
||||
* a uint64_t val to the Go routine that called it.
|
||||
*
|
||||
* @param[in] ::uint32_t device index
|
||||
*
|
||||
* @retval ::uint64_t value of pci id
|
||||
* @retval -1 is returned upon failure.
|
||||
*
|
||||
*/
|
||||
uint64_t goamdsmi_gpu_dev_pci_id_get(uint32_t dv_ind);
|
||||
|
||||
/**
|
||||
* @brief Go language stub to get the VBIOS identifier string
|
||||
*
|
||||
* @details This function will call the rsmi_dev_vbios_ver_get()
|
||||
* function to write the VBIOS char array (up to len characters)
|
||||
* for device dv_ind and return a char pointer. This value is then
|
||||
* passed as char pointer to the Go routine that called it. The caller
|
||||
* of this funcion must free the allocated buffer for the vbios
|
||||
* identifier
|
||||
*
|
||||
* @param[in] ::uint32_t device index
|
||||
* @param[in] ::char* vbios buffer of length
|
||||
*
|
||||
* @retval ::char* VBIOS identifier
|
||||
* @retval NA is returned upon failure
|
||||
*
|
||||
*/
|
||||
char* goamdsmi_gpu_dev_vbios_version_get(uint32_t dv_ind);
|
||||
|
||||
/**
|
||||
* @brief Go language stub to get the vendor
|
||||
*
|
||||
* @details This function will call the rsmi_dev_vendor_name_get()
|
||||
* function to write the name of the vendor char array (up to len
|
||||
* characters) for a device dv_ind and return a char pointer. This
|
||||
* value is then passed as a char pointer to the Go routine that
|
||||
* called it. The caller of this funcion must free the allocated
|
||||
* buffer for the vbios identifier
|
||||
*
|
||||
* @param[in] ::uint32_t device index
|
||||
*
|
||||
* @retval ::char* vendor name
|
||||
* @retval NA is returned upon failure.
|
||||
*
|
||||
*/
|
||||
char* goamdsmi_gpu_dev_vendor_name_get(uint32_t dv_ind);
|
||||
|
||||
/**
|
||||
* @brief Go language stub to get the GPU power cap
|
||||
*
|
||||
* @details This function will call the rsmi_dev_power_cap_get()
|
||||
* function to return the gpu power cap. This value is then
|
||||
* passed as a uint64_t val to the Go routine that
|
||||
* called it.
|
||||
*
|
||||
* @param[in] ::uint32_t device index
|
||||
*
|
||||
* @retval ::uint64_t GPU power cap
|
||||
* @retval -1 is returned upon failure.
|
||||
*
|
||||
*/
|
||||
uint64_t goamdsmi_gpu_dev_power_cap_get(uint32_t dv_ind);
|
||||
|
||||
/**
|
||||
* @brief Go language stub to get the GPU power
|
||||
*
|
||||
* @details This function will call the rsmi_dev_power_get()
|
||||
* function to return the gpu power. This value is then
|
||||
* passed as a uint64_t val to the Go routine that
|
||||
* called it.
|
||||
*
|
||||
* @param[in] ::uint32_t device index
|
||||
*
|
||||
* @retval ::uint64_t GPU power
|
||||
* @retval -1 is returned upon failure.
|
||||
*
|
||||
*/
|
||||
uint64_t goamdsmi_gpu_dev_power_get(uint32_t dv_ind);
|
||||
|
||||
/**
|
||||
* @brief Go language stub to get the GPU current temperature
|
||||
*
|
||||
* @details This function will call the rsmi_dev_temp_metric_get()
|
||||
* function to return the gpu current temperature. This value is then
|
||||
* passed as a uint64_t val to the Go routine that
|
||||
* called it.
|
||||
*
|
||||
* @param[in] ::uint32_t device index, uint32_t sensor, uint32_t metric
|
||||
*
|
||||
* @retval ::uint64_t GPU current temperature
|
||||
* @retval -1 is returned upon failure.
|
||||
*
|
||||
*/
|
||||
uint64_t goamdsmi_gpu_dev_temp_metric_get(uint32_t dv_ind, uint32_t sensor, uint32_t metric);
|
||||
|
||||
/**
|
||||
* @brief Go language stub to get the overdrive level of the device
|
||||
*
|
||||
* @details This function will call the rsmi_dev_overdrive_level_get()
|
||||
* function to return the overdrive percentage. This value is then
|
||||
* passed as a uint32_t val to the Go routine that
|
||||
* called it.
|
||||
*
|
||||
* @param[in] ::uint32_t device index
|
||||
*
|
||||
* @retval ::uint32_t overdrive level
|
||||
* @retval -1 is returned upon failure.
|
||||
*
|
||||
*/
|
||||
uint32_t goamdsmi_gpu_dev_overdrive_level_get(uint32_t dv_ind);
|
||||
|
||||
/**
|
||||
* @brief Go language stub to get the memory overdrive level of the device
|
||||
*
|
||||
* @details This function will call the rsmi_dev_mem_overdrive_level_get()
|
||||
* function to return the memory overdrive percentage. This value is then
|
||||
* passed as a uint32_t val to the Go routine that
|
||||
* called it.
|
||||
*
|
||||
* @param[in] ::uint32_t device index
|
||||
*
|
||||
* @retval ::uint32_t memory overdrive level
|
||||
* @retval -1 is returned upon failure.
|
||||
*
|
||||
*/
|
||||
uint32_t goamdsmi_gpu_dev_mem_overdrive_level_get(uint32_t dv_ind);
|
||||
|
||||
/**
|
||||
* @brief Go language stub to get the performance level of the device
|
||||
*
|
||||
* @details This function will call the rsmi_dev_perf_level_get()
|
||||
* function to return the rsmi_dev_perf_level_t. This value is then
|
||||
* passed as a uint32_t val to the Go routine that
|
||||
* called it.
|
||||
*
|
||||
* @param[in] ::uint32_t device index
|
||||
*
|
||||
* @retval ::uint32_t performance level (rsmi_dev_perf_level_t)
|
||||
* @retval -1 is returned upon failure.
|
||||
*
|
||||
*/
|
||||
uint32_t goamdsmi_gpu_dev_perf_level_get(uint32_t dv_ind);
|
||||
|
||||
/**
|
||||
* @brief Go language stub to get the GPU SCLK limit
|
||||
*
|
||||
* @details This function will call the rsmi_dev_gpu_clk_freq_get()
|
||||
* function to return the gpu SCLK Limit. This value is then
|
||||
* passed as a uint64_t val to the Go routine that
|
||||
* called it.
|
||||
*
|
||||
* @param[in] ::uint32_t device index, flag, ptr to rsmi_frequencies_t
|
||||
*
|
||||
* @retval ::uint64_t GPU SCLK Limit
|
||||
* @retval -1 is returned upon failure.
|
||||
*
|
||||
*/
|
||||
uint64_t goamdsmi_gpu_dev_gpu_clk_freq_get_sclk(uint32_t dv_ind);
|
||||
|
||||
/**
|
||||
* @brief Go language stub to get the GPU MCLK limit
|
||||
*
|
||||
* @details This function will call the rsmi_dev_gpu_clk_freq_get()
|
||||
* function to return the gpu MCLK Limit. This value is then
|
||||
* passed as a uint64_t val to the Go routine that
|
||||
* called it.
|
||||
*
|
||||
* @param[in] ::uint32_t device index, flag, ptr to rsmi_frequencies_t
|
||||
*
|
||||
* @retval ::uint64_t GPU MCLK Limit
|
||||
* @retval -1 is returned upon failure.
|
||||
*
|
||||
*/
|
||||
uint64_t goamdsmi_gpu_dev_gpu_clk_freq_get_mclk(uint32_t dv_ind);
|
||||
|
||||
/**
|
||||
* @brief Go language stub to get the minimum supported SCLK frequency
|
||||
*
|
||||
* @details This function will call the rsmi_od_volt_freq_data_get()
|
||||
* function to return the minium supported SCLK frequency.
|
||||
* This value is then passed as a uint64_t val to the Go routine that
|
||||
* called it.
|
||||
*
|
||||
* @param[in] ::uint32_t device index
|
||||
*
|
||||
* @retval ::uint64_t mimimum supported sclk frequency
|
||||
* @retval -1 is returned upon failure.
|
||||
*
|
||||
*/
|
||||
uint64_t goamdsmi_gpu_od_volt_freq_range_min_get_sclk(uint32_t dv_ind);
|
||||
|
||||
/**
|
||||
* @brief Go language stub to get the minimum supported MCLK frequency
|
||||
*
|
||||
* @details This function will call the rsmi_od_volt_freq_data_get()
|
||||
* function to return the minium supported MCLK frequency.
|
||||
* This value is then passed as a uint64_t val to the Go routine that
|
||||
* called it.
|
||||
*
|
||||
* @param[in] ::uint32_t device index
|
||||
*
|
||||
* @retval ::uint64_t mimimum supported mclk sfrequency
|
||||
* @retval -1 is returned upon failure.
|
||||
*
|
||||
*/
|
||||
uint64_t goamdsmi_gpu_od_volt_freq_range_min_get_mclk(uint32_t dv_ind);
|
||||
|
||||
/**
|
||||
* @brief Go language stub to get the maximum supported SCLK frequency
|
||||
*
|
||||
* @details This function will call the rsmi_od_volt_freq_data_get()
|
||||
* function to return the maxium supported SCLK frequency.
|
||||
* This value is then passed as a uint64_t val to the Go routine that
|
||||
* called it.
|
||||
*
|
||||
* @param[in] ::uint32_t device index
|
||||
*
|
||||
* @retval ::uint64_t maximum supported sclk frequency
|
||||
* @retval -1 is returned upon failure.
|
||||
*
|
||||
*/
|
||||
uint64_t goamdsmi_gpu_od_volt_freq_range_max_get_sclk(uint32_t dv_ind);
|
||||
|
||||
/**
|
||||
* @brief Go language stub to get the maximum supported MCLK frequency
|
||||
*
|
||||
* @details This function will call the rsmi_od_volt_freq_data_get()
|
||||
* function to return the maxium supported MCLK frequency.
|
||||
* This value is then passed as a uint64_t val to the Go routine that
|
||||
* called it.
|
||||
*
|
||||
* @param[in] ::uint32_t device index
|
||||
*
|
||||
* @retval ::uint64_t maximum supported mclk sfrequency
|
||||
* @retval -1 is returned upon failure.
|
||||
*
|
||||
*/
|
||||
uint64_t goamdsmi_gpu_od_volt_freq_range_max_get_mclk(uint32_t dv_ind);
|
||||
|
||||
/**
|
||||
* @brief Go language stub to get the GPU Activity
|
||||
*
|
||||
* @details This function will call the rsmi_dev_gpu_activity_get()
|
||||
* function to return the current GPU use. This value is then
|
||||
* passed as a uint64_t val to the Go routine that
|
||||
* called it.
|
||||
*
|
||||
* @param[in] ::uint32_t device index, flag, ptr to rsmi_frequencies_t
|
||||
*
|
||||
* @retval ::uint32_t GPU Activity use
|
||||
* @retval -1 is returned upon failure.
|
||||
*
|
||||
*/
|
||||
uint32_t goamdsmi_gpu_dev_gpu_busy_percent_get(uint32_t dv_ind);
|
||||
|
||||
/**
|
||||
* @brief Go language stub to get the GPU Memory Use percent
|
||||
*
|
||||
* @details This function will call the rsmi_dev_memory_busy_percent_get()
|
||||
* function to return the current device memory use percent. This value is then
|
||||
* passed as a uint64_t val to the Go routine that
|
||||
* called it.
|
||||
*
|
||||
* @param[in] ::uint32_t device index, flag, ptr to rsmi_frequencies_t
|
||||
*
|
||||
* @retval ::uint32_t GPU Activity use
|
||||
* @retval -1 is returned upon failure.
|
||||
*
|
||||
*/
|
||||
uint32_t goamdsmi_gpu_dev_gpu_busy_percent_get(uint32_t dv_ind);
|
||||
|
||||
/**
|
||||
* @brief Go language stub to get the GPU Memory Use percent
|
||||
*
|
||||
* @details This function will call the rsmi_dev_memory_busy_percent_get()
|
||||
* function to return the current device memory use percent. This value is then
|
||||
* passed as a uint64_t val to the Go routine that
|
||||
* called it.
|
||||
*
|
||||
* @param[in] ::uint32_t device index, flag, ptr to rsmi_frequencies_t
|
||||
*
|
||||
* @retval ::uint64_t GPU memory use percent
|
||||
* @retval -1 is returned upon failure.
|
||||
*
|
||||
*/
|
||||
uint64_t goamdsmi_gpu_dev_gpu_memory_busy_percent_get(uint32_t dv_ind);
|
||||
|
||||
/**
|
||||
* @brief Go language stub to get the GPU Memory Usage
|
||||
*
|
||||
* @details This function will call the rsmi_dev_memory_usage_get()
|
||||
* function to return the amount of memory currently being used. This value is then
|
||||
* passed as a uint64_t val to the Go routine that
|
||||
* called it.
|
||||
*
|
||||
* @param[in] ::uint32_t device index, flag, ptr to rsmi_frequencies_t
|
||||
*
|
||||
* @retval ::uint64_t GPU memory usage
|
||||
* @retval -1 is returned upon failure.
|
||||
*
|
||||
*/
|
||||
uint64_t goamdsmi_gpu_dev_gpu_memory_usage_get(uint32_t dv_ind);
|
||||
|
||||
/**
|
||||
* @brief Go language stub to get the Total amount of GPU Memory
|
||||
*
|
||||
* @details This function will call the rsmi_dev_memory_total_get()
|
||||
* function to return the total amount of memory. This value is then
|
||||
* passed as a uint64_t val to the Go routine that
|
||||
* called it.
|
||||
*
|
||||
* @param[in] ::uint32_t device index, flag, ptr to rsmi_frequencies_t
|
||||
*
|
||||
* @retval ::uint64_t Total GPU memory
|
||||
* @retval -1 is returned upon failure.
|
||||
*
|
||||
*/
|
||||
uint64_t goamdsmi_gpu_dev_gpu_memory_total_get(uint32_t dv_ind);
|
||||
@@ -0,0 +1,63 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
/*
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef GO_AMD_SMI_H_
|
||||
#define GO_AMD_SMI_H_
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#define GOAMDSMI_VALUE_0 0
|
||||
#define GOAMDSMI_UINT16_MAX 0xFFFF
|
||||
#define GOAMDSMI_UINT32_MAX 0xFFFFFFFF
|
||||
#define GOAMDSMI_UINT64_MAX 0xFFFFFFFFFFFFFFFF
|
||||
#define GOAMDSMI_STRING_NA "NA"
|
||||
|
||||
/**
|
||||
* @brief Go language stub to initialize the Debug Level prints
|
||||
* -DENABLE_DEBUG_LEVEL=1 (or) -DENABLE_DEBUG_LEVEL=<Enable_Debug_level_number> must be passed at cmake time
|
||||
*
|
||||
* @retval ::bool value of true upon enabling logs
|
||||
* @retval false is returned upon if user does not want to enable logs.
|
||||
*
|
||||
*/
|
||||
#define enable_debug_level(debug_level) ((ENABLE_DEBUG_LEVEL >= debug_level)?true:false)
|
||||
|
||||
typedef enum {
|
||||
GOAMDSMI_STATUS_SUCCESS = 0x0, //!< Operation successful
|
||||
GOAMDSMI_STATUS_FAILURE = 0x1, //!< Operation failed
|
||||
} goamdsmi_status_t;
|
||||
|
||||
typedef enum {
|
||||
GOAMDSMI_CPU_INIT = 0x0, //!< CPU Init
|
||||
GOAMDSMI_GPU_INIT = 0x1, //!< GPU Init
|
||||
} goamdsmi_Init_t;
|
||||
|
||||
typedef enum {
|
||||
GOAMDSMI_DEBUG_LEVEL_0 = 0x0, //!< Debug Level as 0
|
||||
GOAMDSMI_DEBUG_LEVEL_1 = 0x1, //!< Debug Level as 1
|
||||
GOAMDSMI_DEBUG_LEVEL_2 = 0x2, //!< Debug Level as 2
|
||||
GOAMDSMI_DEBUG_LEVEL_3 = 0x3, //!< Debug Level as 3
|
||||
} goamdsmi_Enable_Debug_Level_t;
|
||||
|
||||
#endif
|
||||
File diff ditekan karena terlalu besar
Load Diff
@@ -0,0 +1,476 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
|
||||
#ifndef _UAPI_ASM_X86_AMD_HSMP_H_
|
||||
#define _UAPI_ASM_X86_AMD_HSMP_H_
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
#pragma pack(4)
|
||||
|
||||
#define HSMP_MAX_MSG_LEN 8
|
||||
|
||||
/*
|
||||
* HSMP Messages supported
|
||||
*/
|
||||
enum hsmp_message_ids {
|
||||
HSMP_TEST = 1, /* 01h Increments input value by 1 */
|
||||
HSMP_GET_SMU_VER, /* 02h SMU FW version */
|
||||
HSMP_GET_PROTO_VER, /* 03h HSMP interface version */
|
||||
HSMP_GET_SOCKET_POWER, /* 04h average package power consumption */
|
||||
HSMP_SET_SOCKET_POWER_LIMIT, /* 05h Set the socket power limit */
|
||||
HSMP_GET_SOCKET_POWER_LIMIT, /* 06h Get current socket power limit */
|
||||
HSMP_GET_SOCKET_POWER_LIMIT_MAX,/* 07h Get maximum socket power value */
|
||||
HSMP_SET_BOOST_LIMIT, /* 08h Set a core maximum frequency limit */
|
||||
HSMP_SET_BOOST_LIMIT_SOCKET, /* 09h Set socket maximum frequency level */
|
||||
HSMP_GET_BOOST_LIMIT, /* 0Ah Get current frequency limit */
|
||||
HSMP_GET_PROC_HOT, /* 0Bh Get PROCHOT status */
|
||||
HSMP_SET_XGMI_LINK_WIDTH, /* 0Ch Set max and min width of xGMI Link */
|
||||
HSMP_SET_DF_PSTATE, /* 0Dh Alter APEnable/Disable messages behavior */
|
||||
HSMP_SET_AUTO_DF_PSTATE, /* 0Eh Enable DF P-State Performance Boost algorithm */
|
||||
HSMP_GET_FCLK_MCLK, /* 0Fh Get FCLK and MEMCLK for current socket */
|
||||
HSMP_GET_CCLK_THROTTLE_LIMIT, /* 10h Get CCLK frequency limit in socket */
|
||||
HSMP_GET_C0_PERCENT, /* 11h Get average C0 residency in socket */
|
||||
HSMP_SET_NBIO_DPM_LEVEL, /* 12h Set max/min LCLK DPM Level for a given NBIO */
|
||||
HSMP_GET_NBIO_DPM_LEVEL, /* 13h Get LCLK DPM level min and max for a given NBIO */
|
||||
HSMP_GET_DDR_BANDWIDTH, /* 14h Get theoretical maximum and current DDR Bandwidth */
|
||||
HSMP_GET_TEMP_MONITOR, /* 15h Get socket temperature */
|
||||
HSMP_GET_DIMM_TEMP_RANGE, /* 16h Get per-DIMM temperature range and refresh rate */
|
||||
HSMP_GET_DIMM_POWER, /* 17h Get per-DIMM power consumption */
|
||||
HSMP_GET_DIMM_THERMAL, /* 18h Get per-DIMM thermal sensors */
|
||||
HSMP_GET_SOCKET_FREQ_LIMIT, /* 19h Get current active frequency per socket */
|
||||
HSMP_GET_CCLK_CORE_LIMIT, /* 1Ah Get CCLK frequency limit per core */
|
||||
HSMP_GET_RAILS_SVI, /* 1Bh Get SVI-based Telemetry for all rails */
|
||||
HSMP_GET_SOCKET_FMAX_FMIN, /* 1Ch Get Fmax and Fmin per socket */
|
||||
HSMP_GET_IOLINK_BANDWITH, /* 1Dh Get current bandwidth on IO Link */
|
||||
HSMP_GET_XGMI_BANDWITH, /* 1Eh Get current bandwidth on xGMI Link */
|
||||
HSMP_SET_GMI3_WIDTH, /* 1Fh Set max and min GMI3 Link width */
|
||||
HSMP_SET_PCI_RATE, /* 20h Control link rate on PCIe devices */
|
||||
HSMP_SET_POWER_MODE, /* 21h Select power efficiency profile policy */
|
||||
HSMP_SET_PSTATE_MAX_MIN, /* 22h Set the max and min DF P-State */
|
||||
HSMP_GET_METRIC_TABLE_VER, /* 23h Get metrics table version */
|
||||
HSMP_GET_METRIC_TABLE, /* 24h Get metrics table */
|
||||
HSMP_GET_METRIC_TABLE_DRAM_ADDR,/* 25h Get metrics table dram address */
|
||||
HSMP_SET_XGMI_PSTATE_RANGE, /* 26h Set xGMI P-state range */
|
||||
HSMP_CPU_RAIL_ISO_FREQ_POLICY, /* 27h Get/Set Cpu Iso frequency policy */
|
||||
HSMP_DFC_ENABLE_CTRL, /* 28h Enable/Disable DF C-state */
|
||||
HSMP_GET_RAPL_UNITS = 0x30, /* 30h Get scaling factor for energy */
|
||||
HSMP_GET_RAPL_CORE_COUNTER, /* 31h Get core energy counter value */
|
||||
HSMP_GET_RAPL_PACKAGE_COUNTER, /* 32h Get package energy counter value */
|
||||
HSMP_MSG_ID_MAX,
|
||||
};
|
||||
|
||||
struct hsmp_message {
|
||||
__u32 msg_id; /* Message ID */
|
||||
__u16 num_args; /* Number of input argument words in message */
|
||||
__u16 response_sz; /* Number of expected output/response words */
|
||||
__u32 args[HSMP_MAX_MSG_LEN];/* argument/response buffer */
|
||||
__u16 sock_ind; /* socket number */
|
||||
};
|
||||
|
||||
enum hsmp_msg_type {
|
||||
HSMP_RSVD = -1,
|
||||
HSMP_SET = 0,
|
||||
HSMP_GET = 1,
|
||||
HSMP_SET_GET = 2,
|
||||
};
|
||||
|
||||
enum hsmp_proto_versions {
|
||||
HSMP_PROTO_VER2 = 2,
|
||||
HSMP_PROTO_VER3,
|
||||
HSMP_PROTO_VER4,
|
||||
HSMP_PROTO_VER5,
|
||||
HSMP_PROTO_VER6,
|
||||
HSMP_PROTO_VER7
|
||||
};
|
||||
|
||||
struct hsmp_msg_desc {
|
||||
int num_args;
|
||||
int response_sz;
|
||||
enum hsmp_msg_type type;
|
||||
};
|
||||
|
||||
/*
|
||||
* User may use these comments as reference, please find the
|
||||
* supported list of messages and message definition in the
|
||||
* HSMP chapter of respective family/model PPR.
|
||||
*
|
||||
* Not supported messages would return -ENOMSG.
|
||||
*/
|
||||
static const struct hsmp_msg_desc hsmp_msg_desc_table[] = {
|
||||
/* RESERVED */
|
||||
{0, 0, HSMP_RSVD},
|
||||
|
||||
/*
|
||||
* HSMP_TEST, num_args = 1, response_sz = 1
|
||||
* input: args[0] = xx
|
||||
* output: args[0] = xx + 1
|
||||
*/
|
||||
{1, 1, HSMP_GET},
|
||||
|
||||
/*
|
||||
* HSMP_GET_SMU_VER, num_args = 0, response_sz = 1
|
||||
* output: args[0] = smu fw ver
|
||||
*/
|
||||
{0, 1, HSMP_GET},
|
||||
|
||||
/*
|
||||
* HSMP_GET_PROTO_VER, num_args = 0, response_sz = 1
|
||||
* output: args[0] = proto version
|
||||
*/
|
||||
{0, 1, HSMP_GET},
|
||||
|
||||
/*
|
||||
* HSMP_GET_SOCKET_POWER, num_args = 0, response_sz = 1
|
||||
* output: args[0] = socket power in mWatts
|
||||
*/
|
||||
{0, 1, HSMP_GET},
|
||||
|
||||
/*
|
||||
* HSMP_SET_SOCKET_POWER_LIMIT, num_args = 1, response_sz = 0
|
||||
* input: args[0] = power limit value in mWatts
|
||||
*/
|
||||
{1, 0, HSMP_SET},
|
||||
|
||||
/*
|
||||
* HSMP_GET_SOCKET_POWER_LIMIT, num_args = 0, response_sz = 1
|
||||
* output: args[0] = socket power limit value in mWatts
|
||||
*/
|
||||
{0, 1, HSMP_GET},
|
||||
|
||||
/*
|
||||
* HSMP_GET_SOCKET_POWER_LIMIT_MAX, num_args = 0, response_sz = 1
|
||||
* output: args[0] = maximuam socket power limit in mWatts
|
||||
*/
|
||||
{0, 1, HSMP_GET},
|
||||
|
||||
/*
|
||||
* HSMP_SET_BOOST_LIMIT, num_args = 1, response_sz = 0
|
||||
* input: args[0] = apic id[31:16] + boost limit value in MHz[15:0]
|
||||
*/
|
||||
{1, 0, HSMP_SET},
|
||||
|
||||
/*
|
||||
* HSMP_SET_BOOST_LIMIT_SOCKET, num_args = 1, response_sz = 0
|
||||
* input: args[0] = boost limit value in MHz
|
||||
*/
|
||||
{1, 0, HSMP_SET},
|
||||
|
||||
/*
|
||||
* HSMP_GET_BOOST_LIMIT, num_args = 1, response_sz = 1
|
||||
* input: args[0] = apic id
|
||||
* output: args[0] = boost limit value in MHz
|
||||
*/
|
||||
{1, 1, HSMP_GET},
|
||||
|
||||
/*
|
||||
* HSMP_GET_PROC_HOT, num_args = 0, response_sz = 1
|
||||
* output: args[0] = proc hot status
|
||||
*/
|
||||
{0, 1, HSMP_GET},
|
||||
|
||||
/*
|
||||
* HSMP_SET_XGMI_LINK_WIDTH, num_args = 1, response_sz = 0
|
||||
* input: args[0] = min link width[15:8] + max link width[7:0]
|
||||
*/
|
||||
{1, 0, HSMP_SET},
|
||||
|
||||
/*
|
||||
* HSMP_SET_DF_PSTATE, num_args = 1, response_sz = 0
|
||||
* input: args[0] = df pstate[7:0]
|
||||
*/
|
||||
{1, 0, HSMP_SET},
|
||||
|
||||
/* HSMP_SET_AUTO_DF_PSTATE, num_args = 0, response_sz = 0 */
|
||||
{0, 0, HSMP_SET},
|
||||
|
||||
/*
|
||||
* HSMP_GET_FCLK_MCLK, num_args = 0, response_sz = 2
|
||||
* output: args[0] = fclk in MHz, args[1] = mclk in MHz
|
||||
*/
|
||||
{0, 2, HSMP_GET},
|
||||
|
||||
/*
|
||||
* HSMP_GET_CCLK_THROTTLE_LIMIT, num_args = 0, response_sz = 1
|
||||
* output: args[0] = core clock in MHz
|
||||
*/
|
||||
{0, 1, HSMP_GET},
|
||||
|
||||
/*
|
||||
* HSMP_GET_C0_PERCENT, num_args = 0, response_sz = 1
|
||||
* output: args[0] = average c0 residency
|
||||
*/
|
||||
{0, 1, HSMP_GET},
|
||||
|
||||
/*
|
||||
* HSMP_SET_NBIO_DPM_LEVEL, num_args = 1, response_sz = 0
|
||||
* input: args[0] = nbioid[23:16] + max dpm level[15:8] + min dpm level[7:0]
|
||||
*/
|
||||
{1, 0, HSMP_SET},
|
||||
|
||||
/*
|
||||
* HSMP_GET_NBIO_DPM_LEVEL, num_args = 1, response_sz = 1
|
||||
* input: args[0] = nbioid[23:16]
|
||||
* output: args[0] = max dpm level[15:8] + min dpm level[7:0]
|
||||
*/
|
||||
{1, 1, HSMP_GET},
|
||||
|
||||
/*
|
||||
* HSMP_GET_DDR_BANDWIDTH, num_args = 0, response_sz = 1
|
||||
* output: args[0] = max bw in Gbps[31:20] + utilised bw in Gbps[19:8] +
|
||||
* bw in percentage[7:0]
|
||||
*/
|
||||
{0, 1, HSMP_GET},
|
||||
|
||||
/*
|
||||
* HSMP_GET_TEMP_MONITOR, num_args = 0, response_sz = 1
|
||||
* output: args[0] = temperature in degree celsius. [15:8] integer part +
|
||||
* [7:5] fractional part
|
||||
*/
|
||||
{0, 1, HSMP_GET},
|
||||
|
||||
/*
|
||||
* HSMP_GET_DIMM_TEMP_RANGE, num_args = 1, response_sz = 1
|
||||
* input: args[0] = DIMM address[7:0]
|
||||
* output: args[0] = refresh rate[3] + temperature range[2:0]
|
||||
*/
|
||||
{1, 1, HSMP_GET},
|
||||
|
||||
/*
|
||||
* HSMP_GET_DIMM_POWER, num_args = 1, response_sz = 1
|
||||
* input: args[0] = DIMM address[7:0]
|
||||
* output: args[0] = DIMM power in mW[31:17] + update rate in ms[16:8] +
|
||||
* DIMM address[7:0]
|
||||
*/
|
||||
{1, 1, HSMP_GET},
|
||||
|
||||
/*
|
||||
* HSMP_GET_DIMM_THERMAL, num_args = 1, response_sz = 1
|
||||
* input: args[0] = DIMM address[7:0]
|
||||
* output: args[0] = temperature in degree celsius[31:21] + update rate in ms[16:8] +
|
||||
* DIMM address[7:0]
|
||||
*/
|
||||
{1, 1, HSMP_GET},
|
||||
|
||||
/*
|
||||
* HSMP_GET_SOCKET_FREQ_LIMIT, num_args = 0, response_sz = 1
|
||||
* output: args[0] = frequency in MHz[31:16] + frequency source[15:0]
|
||||
*/
|
||||
{0, 1, HSMP_GET},
|
||||
|
||||
/*
|
||||
* HSMP_GET_CCLK_CORE_LIMIT, num_args = 1, response_sz = 1
|
||||
* input: args[0] = apic id of the core[31:0]
|
||||
* output: args[0] = frequency in MHz[31:0]
|
||||
*/
|
||||
{1, 1, HSMP_GET},
|
||||
|
||||
/*
|
||||
* HSMP_GET_RAILS_SVI, num_args = 0, response_sz = 1
|
||||
* output: args[0] = power in mW[31:0]
|
||||
*/
|
||||
{0, 1, HSMP_GET},
|
||||
|
||||
/*
|
||||
* HSMP_GET_SOCKET_FMAX_FMIN, num_args = 0, response_sz = 1
|
||||
* output: args[0] = fmax in MHz[31:16] + fmin in MHz[15:0]
|
||||
*/
|
||||
{0, 1, HSMP_GET},
|
||||
|
||||
/*
|
||||
* HSMP_GET_IOLINK_BANDWITH, num_args = 1, response_sz = 1
|
||||
* input: args[0] = link id[15:8] + bw type[2:0]
|
||||
* output: args[0] = io bandwidth in Mbps[31:0]
|
||||
*/
|
||||
{1, 1, HSMP_GET},
|
||||
|
||||
/*
|
||||
* HSMP_GET_XGMI_BANDWITH, num_args = 1, response_sz = 1
|
||||
* input: args[0] = link id[15:8] + bw type[2:0]
|
||||
* output: args[0] = xgmi bandwidth in Mbps[31:0]
|
||||
*/
|
||||
{1, 1, HSMP_GET},
|
||||
|
||||
/*
|
||||
* HSMP_SET_GMI3_WIDTH, num_args = 1, response_sz = 0
|
||||
* input: args[0] = min link width[15:8] + max link width[7:0]
|
||||
*/
|
||||
{1, 0, HSMP_SET},
|
||||
|
||||
/*
|
||||
* HSMP_SET_PCI_RATE, num_args = 1, response_sz = 1
|
||||
* input: args[0] = link rate control value
|
||||
* output: args[0] = previous link rate control value
|
||||
*/
|
||||
{1, 1, HSMP_SET},
|
||||
|
||||
/*
|
||||
* HSMP_SET_POWER_MODE, num_args = 1, response_sz = 0/1
|
||||
* input: args[0] = set/get power mode[31] + power efficiency mode[2:0]
|
||||
* output: args[0] = current power efficiency mode[2:0]
|
||||
*/
|
||||
{1, 1, HSMP_SET_GET},
|
||||
|
||||
/*
|
||||
* HSMP_SET_PSTATE_MAX_MIN, num_args = 1, response_sz = 0
|
||||
* input: args[0] = min df pstate[15:8] + max df pstate[7:0]
|
||||
*/
|
||||
{1, 0, HSMP_SET},
|
||||
|
||||
/*
|
||||
* HSMP_GET_METRIC_TABLE_VER, num_args = 0, response_sz = 1
|
||||
* output: args[0] = metrics table version
|
||||
*/
|
||||
{0, 1, HSMP_GET},
|
||||
|
||||
/*
|
||||
* HSMP_GET_METRIC_TABLE, num_args = 0, response_sz = 0
|
||||
*/
|
||||
{0, 0, HSMP_GET},
|
||||
|
||||
/*
|
||||
* HSMP_GET_METRIC_TABLE_DRAM_ADDR, num_args = 0, response_sz = 2
|
||||
* output: args[0] = lower 32 bits of the address
|
||||
* output: args[1] = upper 32 bits of the address
|
||||
*/
|
||||
{0, 2, HSMP_GET},
|
||||
|
||||
/*
|
||||
* HSMP_SET_XGMI_PSTATE_RANGE, num_args = 1, response_sz = 0
|
||||
* input: args[0] = min xGMI p-state[15:8] + max xGMI state[7:0]
|
||||
*/
|
||||
{1, 0, HSMP_SET},
|
||||
|
||||
/*
|
||||
* HSMP_CPU_RAIL_ISO_FREQ_POLICY, num_args = 1, response_sz = 1
|
||||
* input: args[0] = set/get policy[31] +
|
||||
* disable/enable independent control[0]
|
||||
* output: args[0] = current policy[0]
|
||||
*/
|
||||
{1, 1, HSMP_SET_GET},
|
||||
|
||||
/*
|
||||
* HSMP_DFC_ENABLE_CTRL, num_args = 1, response_sz = 1
|
||||
* input: args[0] = set/get policy[31] + enable/disable DFC[0]
|
||||
* output: args[0] = current policy[0]
|
||||
*/
|
||||
{1, 1, HSMP_SET_GET},
|
||||
|
||||
/* RESERVED(0x29-0x2f) */
|
||||
{0, 0, HSMP_RSVD},
|
||||
{0, 0, HSMP_RSVD},
|
||||
{0, 0, HSMP_RSVD},
|
||||
{0, 0, HSMP_RSVD},
|
||||
{0, 0, HSMP_RSVD},
|
||||
{0, 0, HSMP_RSVD},
|
||||
{0, 0, HSMP_RSVD},
|
||||
|
||||
/*
|
||||
* HSMP_GET_RAPL_UNITS, response_sz = 1
|
||||
* output: args[0] = tu value[19:16] + esu value[12:8]
|
||||
*/
|
||||
{0, 1, HSMP_GET},
|
||||
|
||||
/*
|
||||
* HSMP_GET_RAPL_CORE_COUNTER, num_args = 1, response_sz = 1
|
||||
* input: args[0] = Apic id[15:0]
|
||||
* output: args[0] = lower 32 bits of energy
|
||||
* output: args[1] = upper 32 bits of energy
|
||||
*/
|
||||
{1, 2, HSMP_GET},
|
||||
|
||||
/*
|
||||
* HSMP_GET_RAPL_PACKAGE_COUNTER, num_args = 0, response_sz = 1
|
||||
* output: args[0] = lower 32 bits of energy
|
||||
* output: args[1] = upper 32 bits of energy
|
||||
*/
|
||||
{0, 2, HSMP_GET},
|
||||
};
|
||||
|
||||
/* Metrics table (supported only with proto version 6) */
|
||||
struct hsmp_metric_table {
|
||||
__u32 accumulation_counter;
|
||||
|
||||
/* TEMPERATURE */
|
||||
__u32 max_socket_temperature;
|
||||
__u32 max_vr_temperature;
|
||||
__u32 max_hbm_temperature;
|
||||
__u64 max_socket_temperature_acc;
|
||||
__u64 max_vr_temperature_acc;
|
||||
__u64 max_hbm_temperature_acc;
|
||||
|
||||
/* POWER */
|
||||
__u32 socket_power_limit;
|
||||
__u32 max_socket_power_limit;
|
||||
__u32 socket_power;
|
||||
|
||||
/* ENERGY */
|
||||
__u64 timestamp;
|
||||
__u64 socket_energy_acc;
|
||||
__u64 ccd_energy_acc;
|
||||
__u64 xcd_energy_acc;
|
||||
__u64 aid_energy_acc;
|
||||
__u64 hbm_energy_acc;
|
||||
|
||||
/* FREQUENCY */
|
||||
__u32 cclk_frequency_limit;
|
||||
__u32 gfxclk_frequency_limit;
|
||||
__u32 fclk_frequency;
|
||||
__u32 uclk_frequency;
|
||||
__u32 socclk_frequency[4];
|
||||
__u32 vclk_frequency[4];
|
||||
__u32 dclk_frequency[4];
|
||||
__u32 lclk_frequency[4];
|
||||
__u64 gfxclk_frequency_acc[8];
|
||||
__u64 cclk_frequency_acc[96];
|
||||
|
||||
/* FREQUENCY RANGE */
|
||||
__u32 max_cclk_frequency;
|
||||
__u32 min_cclk_frequency;
|
||||
__u32 max_gfxclk_frequency;
|
||||
__u32 min_gfxclk_frequency;
|
||||
__u32 fclk_frequency_table[4];
|
||||
__u32 uclk_frequency_table[4];
|
||||
__u32 socclk_frequency_table[4];
|
||||
__u32 vclk_frequency_table[4];
|
||||
__u32 dclk_frequency_table[4];
|
||||
__u32 lclk_frequency_table[4];
|
||||
__u32 max_lclk_dpm_range;
|
||||
__u32 min_lclk_dpm_range;
|
||||
|
||||
/* XGMI */
|
||||
__u32 xgmi_width;
|
||||
__u32 xgmi_bitrate;
|
||||
__u64 xgmi_read_bandwidth_acc[8];
|
||||
__u64 xgmi_write_bandwidth_acc[8];
|
||||
|
||||
/* ACTIVITY */
|
||||
__u32 socket_c0_residency;
|
||||
__u32 socket_gfx_busy;
|
||||
__u32 dram_bandwidth_utilization;
|
||||
__u64 socket_c0_residency_acc;
|
||||
__u64 socket_gfx_busy_acc;
|
||||
__u64 dram_bandwidth_acc;
|
||||
__u32 max_dram_bandwidth;
|
||||
__u64 dram_bandwidth_utilization_acc;
|
||||
__u64 pcie_bandwidth_acc[4];
|
||||
|
||||
/* THROTTLERS */
|
||||
__u32 prochot_residency_acc;
|
||||
__u32 ppt_residency_acc;
|
||||
__u32 socket_thm_residency_acc;
|
||||
__u32 vr_thm_residency_acc;
|
||||
__u32 hbm_thm_residency_acc;
|
||||
__u32 spare;
|
||||
|
||||
/* New items at the end to maintain driver compatibility */
|
||||
__u32 gfxclk_frequency[8];
|
||||
};
|
||||
|
||||
/* Reset to default packing */
|
||||
#pragma pack()
|
||||
|
||||
/* Define unique ioctl command for hsmp msgs using generic _IOWR */
|
||||
#define HSMP_BASE_IOCTL_NR 0xF8
|
||||
#define HSMP_IOCTL_CMD _IOWR(HSMP_BASE_IOCTL_NR, 0, struct hsmp_message)
|
||||
|
||||
#endif /*_ASM_X86_AMD_HSMP_H_*/
|
||||
@@ -0,0 +1,118 @@
|
||||
/*
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef AMD_SMI_INCLUDE_AMD_SMI_COMMON_H_
|
||||
#define AMD_SMI_INCLUDE_AMD_SMI_COMMON_H_
|
||||
|
||||
#include <map>
|
||||
#include "rocm_smi/rocm_smi.h"
|
||||
#include "amd_smi/amdsmi.h"
|
||||
|
||||
#ifdef ENABLE_ESMI_LIB
|
||||
extern "C" {
|
||||
#include <cstdint>
|
||||
#include <e_smi/e_smi.h>
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace amd::smi {
|
||||
|
||||
// Define a map of rsmi status codes to amdsmi status codes
|
||||
const std::map<rsmi_status_t, amdsmi_status_t> rsmi_status_map = {
|
||||
{RSMI_STATUS_SUCCESS, AMDSMI_STATUS_SUCCESS},
|
||||
{RSMI_STATUS_INVALID_ARGS, AMDSMI_STATUS_INVAL},
|
||||
{RSMI_STATUS_NOT_SUPPORTED, AMDSMI_STATUS_NOT_SUPPORTED},
|
||||
{RSMI_STATUS_FILE_ERROR, AMDSMI_STATUS_FILE_ERROR},
|
||||
{RSMI_STATUS_PERMISSION, AMDSMI_STATUS_NO_PERM},
|
||||
{RSMI_STATUS_OUT_OF_RESOURCES, AMDSMI_STATUS_OUT_OF_RESOURCES},
|
||||
{RSMI_STATUS_INTERNAL_EXCEPTION, AMDSMI_STATUS_INTERNAL_EXCEPTION},
|
||||
{RSMI_STATUS_INPUT_OUT_OF_BOUNDS, AMDSMI_STATUS_INPUT_OUT_OF_BOUNDS},
|
||||
{RSMI_STATUS_INIT_ERROR, AMDSMI_STATUS_NOT_INIT},
|
||||
{RSMI_INITIALIZATION_ERROR, AMDSMI_STATUS_NOT_INIT},
|
||||
{RSMI_STATUS_NOT_YET_IMPLEMENTED, AMDSMI_STATUS_NOT_YET_IMPLEMENTED},
|
||||
{RSMI_STATUS_NOT_FOUND, AMDSMI_STATUS_NOT_FOUND},
|
||||
{RSMI_STATUS_INSUFFICIENT_SIZE, AMDSMI_STATUS_INSUFFICIENT_SIZE},
|
||||
{RSMI_STATUS_INTERRUPT, AMDSMI_STATUS_INTERRUPT},
|
||||
{RSMI_STATUS_UNEXPECTED_SIZE, AMDSMI_STATUS_UNEXPECTED_SIZE},
|
||||
{RSMI_STATUS_NO_DATA, AMDSMI_STATUS_NO_DATA},
|
||||
{RSMI_STATUS_UNEXPECTED_DATA, AMDSMI_STATUS_UNEXPECTED_DATA},
|
||||
{RSMI_STATUS_BUSY, AMDSMI_STATUS_BUSY},
|
||||
{RSMI_STATUS_REFCOUNT_OVERFLOW, AMDSMI_STATUS_REFCOUNT_OVERFLOW},
|
||||
{RSMI_STATUS_DIRECTORY_NOT_FOUND, AMDSMI_STATUS_DIRECTORY_NOT_FOUND},
|
||||
{RSMI_STATUS_SETTING_UNAVAILABLE, AMDSMI_STATUS_SETTING_UNAVAILABLE},
|
||||
{RSMI_STATUS_AMDGPU_RESTART_ERR, AMDSMI_STATUS_AMDGPU_RESTART_ERR},
|
||||
{RSMI_STATUS_UNKNOWN_ERROR, AMDSMI_STATUS_UNKNOWN_ERROR},
|
||||
};
|
||||
|
||||
const std::map<unsigned, amdsmi_vram_type_t> vram_type_map = {
|
||||
{0, AMDSMI_VRAM_TYPE_UNKNOWN},
|
||||
{1, AMDSMI_VRAM_TYPE_GDDR1},
|
||||
{2, AMDSMI_VRAM_TYPE_DDR2},
|
||||
{3, AMDSMI_VRAM_TYPE_GDDR3},
|
||||
{4, AMDSMI_VRAM_TYPE_GDDR4},
|
||||
{5, AMDSMI_VRAM_TYPE_GDDR5},
|
||||
{6, AMDSMI_VRAM_TYPE_HBM},
|
||||
{7, AMDSMI_VRAM_TYPE_DDR3},
|
||||
{8, AMDSMI_VRAM_TYPE_DDR4},
|
||||
{9, AMDSMI_VRAM_TYPE_GDDR6},
|
||||
{10, AMDSMI_VRAM_TYPE_DDR5},
|
||||
{11, AMDSMI_VRAM_TYPE_LPDDR4},
|
||||
{12, AMDSMI_VRAM_TYPE_LPDDR5},
|
||||
{13, AMDSMI_VRAM_TYPE_HBM3E},
|
||||
};
|
||||
|
||||
amdsmi_status_t rsmi_to_amdsmi_status(rsmi_status_t status);
|
||||
|
||||
amdsmi_vram_type_t vram_type_value(unsigned type);
|
||||
|
||||
#ifdef ENABLE_ESMI_LIB
|
||||
// Define a map of esmi status codes to amdsmi status codes
|
||||
const std::map<esmi_status_t, amdsmi_status_t> esmi_status_map = {
|
||||
{ESMI_SUCCESS, AMDSMI_STATUS_SUCCESS},
|
||||
{ESMI_INITIALIZED, AMDSMI_STATUS_SUCCESS},
|
||||
{ESMI_INVALID_INPUT, AMDSMI_STATUS_INVAL},
|
||||
{ESMI_NOT_SUPPORTED, AMDSMI_STATUS_NOT_SUPPORTED},
|
||||
{ESMI_PERMISSION, AMDSMI_STATUS_NO_PERM},
|
||||
{ESMI_INTERRUPTED, AMDSMI_STATUS_INTERRUPT},
|
||||
{ESMI_IO_ERROR, AMDSMI_STATUS_IO},
|
||||
{ESMI_FILE_ERROR, AMDSMI_STATUS_FILE_ERROR},
|
||||
{ESMI_NO_MEMORY, AMDSMI_STATUS_OUT_OF_RESOURCES},
|
||||
{ESMI_DEV_BUSY, AMDSMI_STATUS_BUSY},
|
||||
{ESMI_NOT_INITIALIZED, AMDSMI_STATUS_NOT_INIT},
|
||||
{ESMI_UNEXPECTED_SIZE, AMDSMI_STATUS_UNEXPECTED_SIZE},
|
||||
{ESMI_UNKNOWN_ERROR, AMDSMI_STATUS_UNKNOWN_ERROR},
|
||||
{ESMI_NO_ENERGY_DRV, AMDSMI_STATUS_NO_ENERGY_DRV},
|
||||
{ESMI_NO_MSR_DRV, AMDSMI_STATUS_NO_MSR_DRV},
|
||||
{ESMI_NO_HSMP_DRV, AMDSMI_STATUS_NO_HSMP_DRV},
|
||||
{ESMI_NO_HSMP_SUP, AMDSMI_STATUS_NO_HSMP_SUP},
|
||||
{ESMI_NO_DRV, AMDSMI_STATUS_NO_DRV},
|
||||
{ESMI_FILE_NOT_FOUND, AMDSMI_STATUS_FILE_NOT_FOUND},
|
||||
{ESMI_ARG_PTR_NULL, AMDSMI_STATUS_ARG_PTR_NULL},
|
||||
{ESMI_HSMP_TIMEOUT, AMDSMI_STATUS_HSMP_TIMEOUT},
|
||||
{ESMI_NO_HSMP_MSG_SUP, AMDSMI_STATUS_NO_HSMP_MSG_SUP},
|
||||
};
|
||||
|
||||
amdsmi_status_t esmi_to_amdsmi_status(esmi_status_t status);
|
||||
#endif
|
||||
} // namespace amd::smi
|
||||
|
||||
#endif // AMD_SMI_INCLUDE_AMD_SMI_COMMON_H_
|
||||
@@ -0,0 +1,224 @@
|
||||
/*
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <vector>
|
||||
|
||||
#include "amd_smi/amdsmi.h"
|
||||
|
||||
#pragma pack(1)
|
||||
|
||||
#define CPER_MAX_OAM_COUNT (8)
|
||||
|
||||
enum cper_error_severity {
|
||||
CPER_SEV_FATAL_UNCORRECTED = 0,
|
||||
CPER_SEV_FATAL = 1,
|
||||
CPER_SEV_FATAL_CORRECTED = 2,
|
||||
|
||||
CPER_SEV_UNUSED = 10,
|
||||
};
|
||||
|
||||
enum cper_aca_reg {
|
||||
CPER_ACA_REG_CTL_LO = 0,
|
||||
CPER_ACA_REG_CTL_HI = 1,
|
||||
CPER_ACA_REG_STATUS_LO = 2,
|
||||
CPER_ACA_REG_STATUS_HI = 3,
|
||||
CPER_ACA_REG_ADDR_LO = 4,
|
||||
CPER_ACA_REG_ADDR_HI = 5,
|
||||
CPER_ACA_REG_MISC0_LO = 6,
|
||||
CPER_ACA_REG_MISC0_HI = 7,
|
||||
CPER_ACA_REG_CONFIG_LO = 8,
|
||||
CPER_ACA_REG_CONFIG_HI = 9,
|
||||
CPER_ACA_REG_IPID_LO = 10,
|
||||
CPER_ACA_REG_IPID_HI = 11,
|
||||
CPER_ACA_REG_SYND_LO = 12,
|
||||
CPER_ACA_REG_SYND_HI = 13,
|
||||
|
||||
CPER_ACA_REG_COUNT = 32,
|
||||
};
|
||||
|
||||
struct cper_sec_desc {
|
||||
uint32_t sec_offset; /* Offset from the start of CPER entry */
|
||||
uint32_t sec_length;
|
||||
uint8_t revision_minor; /* CPER_SEC_MINOR_REV_1 */
|
||||
uint8_t revision_major; /* CPER_SEC_MAJOR_REV_22 */
|
||||
union {
|
||||
struct {
|
||||
uint8_t fru_id : 1;
|
||||
uint8_t fru_text : 1;
|
||||
uint8_t reserved : 6;
|
||||
} valid_bits;
|
||||
uint8_t valid_mask;
|
||||
};
|
||||
uint8_t reserved;
|
||||
union {
|
||||
struct {
|
||||
uint32_t primary : 1;
|
||||
uint32_t reserved1 : 2;
|
||||
uint32_t exceed_err_threshold : 1;
|
||||
uint32_t latent_err : 1; /* "Deferred" error Creation*/
|
||||
uint32_t reserved2 : 27;
|
||||
} flags_bits;
|
||||
uint32_t flags_mask;
|
||||
};
|
||||
amdsmi_cper_guid_t sec_type; /* AMD non-Standard, AMD Crashdump */
|
||||
char fru_id[16]; /* FRU Serial ID */
|
||||
amdsmi_cper_sev_t severity;
|
||||
char fru_text[20]; /* "OAM%d" */
|
||||
};
|
||||
|
||||
struct cper_sec_nonstd_err_info {
|
||||
amdsmi_cper_guid_t error_type;
|
||||
union {
|
||||
struct {
|
||||
uint64_t ms_chk : 1;
|
||||
uint64_t target_addr_id : 1;
|
||||
uint64_t req_id : 1;
|
||||
uint64_t resp_id : 1;
|
||||
uint64_t instr_ptr : 1;
|
||||
uint64_t reserved : 59;
|
||||
} valid_bits;
|
||||
uint64_t valid_mask;
|
||||
};
|
||||
union {
|
||||
struct {
|
||||
uint64_t err_type_valid : 1;
|
||||
uint64_t pcc_valid : 1;
|
||||
uint64_t uncorr_valid : 1;
|
||||
uint64_t precise_ip_valid : 1;
|
||||
uint64_t restartable_ip_valid : 1;
|
||||
uint64_t overflow_valid : 1;
|
||||
uint64_t reserved1 : 10;
|
||||
|
||||
uint64_t err_type : 2;
|
||||
uint64_t pcc : 1;
|
||||
uint64_t uncorr : 1;
|
||||
uint64_t precised_ip : 1;
|
||||
uint64_t restartable_ip : 1;
|
||||
uint64_t overflow : 1;
|
||||
uint64_t reserved2 : 41;
|
||||
} ms_chk_bits;
|
||||
uint64_t ms_chk_mask;
|
||||
};
|
||||
|
||||
uint64_t target_addr_id;
|
||||
uint64_t req_id;
|
||||
uint64_t resp_id;
|
||||
uint64_t instr_ptr;
|
||||
};
|
||||
|
||||
struct cper_sec_nonstd_err_ctx {
|
||||
uint16_t reg_ctx_type;
|
||||
uint16_t reg_arr_size;
|
||||
uint32_t msr_addr;
|
||||
uint64_t mm_reg_addr;
|
||||
uint32_t reg_dump[CPER_ACA_REG_COUNT]; /* This buffer can grow */
|
||||
};
|
||||
|
||||
struct cper_sec_nonstd_err_hdr {
|
||||
union {
|
||||
struct {
|
||||
uint64_t apic_id : 1;
|
||||
uint64_t fw_id : 1;
|
||||
uint64_t err_info_cnt : 6; /* should match context_cnt */
|
||||
uint64_t err_context_cnt : 6; /* should match info_cnt */
|
||||
} valid_bits;
|
||||
uint64_t valid_mask;
|
||||
};
|
||||
|
||||
uint64_t apic_id;
|
||||
char fw_id[48];
|
||||
};
|
||||
|
||||
struct cper_sec_nonstd_err_body {
|
||||
struct cper_sec_nonstd_err_info err_info;
|
||||
struct cper_sec_nonstd_err_ctx err_ctx;
|
||||
};
|
||||
|
||||
struct cper_sec_nonstd_err {
|
||||
struct cper_sec_nonstd_err_hdr hdr;
|
||||
struct cper_sec_nonstd_err_body body[]; /* Variable Size, today only 1 entry */
|
||||
};
|
||||
|
||||
struct cper_sec_crashdump_data {
|
||||
uint16_t reg_ctx_type;
|
||||
uint16_t reg_arr_size;
|
||||
uint32_t reserved1;
|
||||
uint64_t reserved2;
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t status_lo;
|
||||
uint32_t status_hi;
|
||||
uint32_t addr_lo;
|
||||
uint32_t addr_hi;
|
||||
uint32_t ipid_lo;
|
||||
uint32_t ipid_hi;
|
||||
uint32_t synd_lo;
|
||||
uint32_t synd_hi;
|
||||
} fatal_err;
|
||||
|
||||
struct {
|
||||
uint64_t msg[CPER_MAX_OAM_COUNT];
|
||||
} boot_err;
|
||||
} dump;
|
||||
|
||||
};
|
||||
|
||||
struct cper_sec_crashdump {
|
||||
uint64_t reserved1;
|
||||
uint64_t reserved2;
|
||||
char fw_id[48];
|
||||
uint64_t reserved3[8];
|
||||
|
||||
struct cper_sec_crashdump_data data;
|
||||
};
|
||||
|
||||
struct cper_sec {
|
||||
union {
|
||||
struct {
|
||||
uint8_t fru_id : 1;
|
||||
uint8_t fru_text : 1;
|
||||
uint8_t reserved : 6;
|
||||
} valid_bits;
|
||||
uint8_t valid_mask;
|
||||
};
|
||||
|
||||
union {
|
||||
struct cper_sec_crashdump crashdump;
|
||||
struct cper_sec_nonstd_err runtime_err;
|
||||
};
|
||||
};
|
||||
|
||||
/* General CPER record structure */
|
||||
struct cper_1_0 {
|
||||
struct cper_hdr *hdr;
|
||||
struct cper_sec_desc *sec_desc; /* Variable Size */
|
||||
struct cper_sec *sec; /* Variable Size */
|
||||
};
|
||||
|
||||
#pragma pack()
|
||||
|
||||
amdsmi_status_t amdsmi_get_gpu_cper_entries_by_path(const char *amdgpu_ring_cper_file, uint32_t severity_mask,
|
||||
char *cper_data, uint64_t *buf_size, amdsmi_cper_hdr_t **cper_hdrs,
|
||||
uint64_t *entry_count, uint64_t *cursor, uint64_t product_serial);
|
||||
std::vector<int> cper_decode(const amdsmi_cper_hdr_t *cper);
|
||||
@@ -0,0 +1,68 @@
|
||||
/*
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef AMD_SMI_INCLUDE_IMPL_AMD_SMI_DRM_H_
|
||||
#define AMD_SMI_INCLUDE_IMPL_AMD_SMI_DRM_H_
|
||||
|
||||
#include <unistd.h>
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <mutex> // NOLINT
|
||||
#include <string>
|
||||
|
||||
#include "amd_smi/amdsmi.h"
|
||||
#include "amd_smi/impl/amd_smi_lib_loader.h"
|
||||
#include "amd_smi/impl/amdgpu_drm.h"
|
||||
#include "amd_smi/impl/xf86drm.h"
|
||||
#include "amd_smi/impl/scoped_fd.h"
|
||||
|
||||
namespace amd::smi {
|
||||
|
||||
class AMDSmiDrm {
|
||||
public:
|
||||
amdsmi_status_t init();
|
||||
amdsmi_status_t cleanup();
|
||||
amdsmi_status_t get_bdf_by_index(uint32_t gpu_index, amdsmi_bdf_t *bdf_info) const;
|
||||
amdsmi_status_t get_drm_path_by_index(uint32_t gpu_index, std::string *drm_path) const;
|
||||
std::vector<amdsmi_bdf_t> get_bdfs();
|
||||
std::vector<std::string>& get_drm_paths();
|
||||
bool check_if_drm_is_supported();
|
||||
uint32_t get_vendor_id();
|
||||
|
||||
private:
|
||||
// when file is not found, the empty string will be returned
|
||||
std::string find_file_in_folder(const std::string& folder,
|
||||
const std::string& regex);
|
||||
std::vector<std::string> drm_paths_; // drm path (renderD128 for example)
|
||||
std::vector<amdsmi_bdf_t> drm_bdfs_; // bdf
|
||||
uint32_t vendor_id;
|
||||
|
||||
AMDSmiLibraryLoader lib_loader_; // lazy load libdrm
|
||||
|
||||
std::mutex drm_mutex_;
|
||||
};
|
||||
|
||||
|
||||
} // namespace amd::smi
|
||||
|
||||
#endif // AMD_SMI_INCLUDE_IMPL_AMD_SMI_DRM_H_
|
||||
@@ -0,0 +1,95 @@
|
||||
/*
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef AMD_SMI_INCLUDE_IMPL_AMD_SMI_GPU_DEVICE_H_
|
||||
#define AMD_SMI_INCLUDE_IMPL_AMD_SMI_GPU_DEVICE_H_
|
||||
|
||||
#include <map>
|
||||
|
||||
#include "amd_smi/amdsmi.h"
|
||||
#include "amd_smi/impl/amd_smi_processor.h"
|
||||
#include "amd_smi/impl/amd_smi_drm.h"
|
||||
|
||||
namespace amd::smi {
|
||||
|
||||
|
||||
// PID, amdsmi_proc_info_t
|
||||
using GPUComputeProcessList_t = std::map<amdsmi_process_handle_t, amdsmi_proc_info_t>;
|
||||
using ComputeProcessListClassType_t = uint16_t;
|
||||
|
||||
enum class ComputeProcessListType_t : ComputeProcessListClassType_t
|
||||
{
|
||||
kAllProcesses,
|
||||
kAllProcessesOnDevice,
|
||||
};
|
||||
|
||||
|
||||
class AMDSmiGPUDevice: public AMDSmiProcessor {
|
||||
|
||||
public:
|
||||
AMDSmiGPUDevice(uint32_t gpu_id, std::string path, amdsmi_bdf_t bdf, AMDSmiDrm& drm):
|
||||
AMDSmiProcessor(AMDSMI_PROCESSOR_TYPE_AMD_GPU), gpu_id_(gpu_id), path_(path), bdf_(bdf), drm_(drm) {}
|
||||
|
||||
AMDSmiGPUDevice(uint32_t gpu_id, AMDSmiDrm& drm):
|
||||
AMDSmiProcessor(AMDSMI_PROCESSOR_TYPE_AMD_GPU), gpu_id_(gpu_id), drm_(drm) {
|
||||
if (check_if_drm_is_supported()) this->get_drm_data();
|
||||
}
|
||||
~AMDSmiGPUDevice() {
|
||||
}
|
||||
|
||||
amdsmi_status_t get_drm_data();
|
||||
pthread_mutex_t* get_mutex();
|
||||
uint32_t get_gpu_id() const;
|
||||
uint32_t get_card_id(); // -e feature + we can get card_id for our internal functions
|
||||
uint32_t get_drm_render_minor(); // -e feature + we can get card_id for our internal functions
|
||||
uint64_t get_kfd_gpu_id(); // Used to decode vram usage for KFD processes
|
||||
std::string& get_gpu_path();
|
||||
amdsmi_bdf_t get_bdf();
|
||||
bool check_if_drm_is_supported() { return drm_.check_if_drm_is_supported(); }
|
||||
uint32_t get_vendor_id();
|
||||
const GPUComputeProcessList_t& amdgpu_get_compute_process_list(ComputeProcessListType_t list_type = ComputeProcessListType_t::kAllProcessesOnDevice);
|
||||
|
||||
|
||||
// New methods for -e feature
|
||||
std::string bdf_to_string() const; // -e feature
|
||||
std::vector<uint64_t> get_bitmask_from_numa_node(int32_t node_id, uint32_t size) const;
|
||||
std::vector<uint64_t> get_bitmask_from_local_cpulist(uint32_t drm_card, uint32_t size) const;
|
||||
|
||||
private:
|
||||
uint32_t gpu_id_;
|
||||
std::string path_;
|
||||
amdsmi_bdf_t bdf_;
|
||||
uint32_t vendor_id_;
|
||||
AMDSmiDrm& drm_;
|
||||
uint32_t card_index_;
|
||||
uint32_t drm_render_minor_;
|
||||
uint64_t kfd_gpu_id_; // Used to decode vram usage for KFD processes
|
||||
GPUComputeProcessList_t compute_process_list_;
|
||||
int32_t get_compute_process_list_impl(GPUComputeProcessList_t& compute_process_list,
|
||||
ComputeProcessListType_t list_type);
|
||||
|
||||
};
|
||||
|
||||
|
||||
} // namespace amd::smi
|
||||
|
||||
#endif // AMD_SMI_INCLUDE_IMPL_AMD_SMI_GPU_DEVICE_H_
|
||||
@@ -0,0 +1,81 @@
|
||||
/*
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef AMD_SMI_INCLUDE_IMPL_AMD_SMI_LIB_LOADER_H_
|
||||
#define AMD_SMI_INCLUDE_IMPL_AMD_SMI_LIB_LOADER_H_
|
||||
#include <dlfcn.h>
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include <mutex> // NOLINT(build/c++11)
|
||||
#include "amd_smi/amdsmi.h"
|
||||
|
||||
|
||||
namespace amd::smi {
|
||||
class AMDSmiLibraryLoader {
|
||||
public:
|
||||
AMDSmiLibraryLoader();
|
||||
|
||||
amdsmi_status_t load(const char* filename);
|
||||
|
||||
template<typename T> amdsmi_status_t load_symbol(T* func_handler,
|
||||
const char* func_name);
|
||||
|
||||
|
||||
amdsmi_status_t unload();
|
||||
|
||||
~AMDSmiLibraryLoader();
|
||||
|
||||
private:
|
||||
void* libHandler_;
|
||||
std::mutex library_mutex_;
|
||||
bool library_loaded_ = false;
|
||||
};
|
||||
|
||||
template<typename T> amdsmi_status_t AMDSmiLibraryLoader::load_symbol(
|
||||
T* func_handler,
|
||||
const char* func_name) {
|
||||
if (!libHandler_) {
|
||||
return AMDSMI_STATUS_FAIL_LOAD_MODULE;
|
||||
}
|
||||
|
||||
if (!func_handler || !func_name) {
|
||||
return AMDSMI_STATUS_FAIL_LOAD_SYMBOL;
|
||||
}
|
||||
|
||||
std::lock_guard<std::mutex> guard(library_mutex_);
|
||||
|
||||
*reinterpret_cast<void**>(func_handler) =
|
||||
dlsym(libHandler_, func_name);
|
||||
if (*func_handler == nullptr) {
|
||||
char* error = dlerror();
|
||||
std::cerr << "AMDSmiLibraryLoader: Fail to load the symbol "
|
||||
<< func_name << ": " << error << std::endl;
|
||||
return AMDSMI_STATUS_FAIL_LOAD_SYMBOL;
|
||||
}
|
||||
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
} // namespace amd::smi
|
||||
|
||||
|
||||
#endif // AMD_SMI_INCLUDE_IMPL_AMD_SMI_LIB_LOADER_H_
|
||||
@@ -0,0 +1,47 @@
|
||||
/*
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef AMD_SMI_INCLUDE_AMD_SMI_PROCESSOR_H_
|
||||
#define AMD_SMI_INCLUDE_AMD_SMI_PROCESSOR_H_
|
||||
|
||||
#include <string>
|
||||
#include "amd_smi/amdsmi.h"
|
||||
|
||||
namespace amd::smi {
|
||||
|
||||
class AMDSmiProcessor {
|
||||
public:
|
||||
explicit AMDSmiProcessor(processor_type_t type) : processor_type_(type) {}
|
||||
explicit AMDSmiProcessor(processor_type_t type, uint32_t index) : processor_type_(type), pindex_(index) {}
|
||||
explicit AMDSmiProcessor(const std::string& id) : processor_identifier_(id) {}
|
||||
virtual ~AMDSmiProcessor() {}
|
||||
processor_type_t get_processor_type() const { return processor_type_;}
|
||||
const std::string& get_processor_id() const { return processor_identifier_;}
|
||||
uint32_t get_processor_index() const { return pindex_;}
|
||||
private:
|
||||
processor_type_t processor_type_;
|
||||
uint32_t pindex_;
|
||||
std::string processor_identifier_;
|
||||
};
|
||||
} // namespace amd::smi
|
||||
|
||||
#endif // AMD_SMI_INCLUDE_AMD_SMI_PROCESSOR_H_
|
||||
@@ -0,0 +1,80 @@
|
||||
/*
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef AMD_SMI_INCLUDE_AMD_SMI_SOCKET_H_
|
||||
#define AMD_SMI_INCLUDE_AMD_SMI_SOCKET_H_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "amd_smi/amdsmi.h"
|
||||
#include "amd_smi/impl/amd_smi_processor.h"
|
||||
|
||||
namespace amd::smi {
|
||||
|
||||
class AMDSmiSocket {
|
||||
public:
|
||||
explicit AMDSmiSocket(const std::string& id) : socket_identifier_(id) {}
|
||||
explicit AMDSmiSocket(uint32_t index) : sindex_(index) {}
|
||||
~AMDSmiSocket();
|
||||
const std::string& get_socket_id() const { return socket_identifier_;}
|
||||
uint32_t get_socket_index() { return sindex_;}
|
||||
void add_processor(AMDSmiProcessor* processor) {
|
||||
switch (processor->get_processor_type()) {
|
||||
case AMDSMI_PROCESSOR_TYPE_AMD_GPU:
|
||||
processors_.push_back(processor);
|
||||
break;
|
||||
case AMDSMI_PROCESSOR_TYPE_AMD_CPU:
|
||||
cpu_processors_.push_back(processor);
|
||||
break;
|
||||
case AMDSMI_PROCESSOR_TYPE_AMD_CPU_CORE:
|
||||
cpu_core_processors_.push_back(processor);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
std::vector<AMDSmiProcessor*>& get_processors() { return processors_;}
|
||||
std::vector<AMDSmiProcessor*>& get_processors(processor_type_t type) {
|
||||
switch (type) {
|
||||
case AMDSMI_PROCESSOR_TYPE_AMD_GPU:
|
||||
return processors_;
|
||||
case AMDSMI_PROCESSOR_TYPE_AMD_CPU:
|
||||
return cpu_processors_;
|
||||
case AMDSMI_PROCESSOR_TYPE_AMD_CPU_CORE:
|
||||
return cpu_core_processors_;
|
||||
default:
|
||||
return processors_;
|
||||
}
|
||||
}
|
||||
amdsmi_status_t get_processor_count(uint32_t* processor_count) const;
|
||||
amdsmi_status_t get_processor_count(processor_type_t type, uint32_t* processor_count) const;
|
||||
private:
|
||||
uint32_t sindex_;
|
||||
std::string socket_identifier_;
|
||||
std::vector<AMDSmiProcessor*> processors_;
|
||||
std::vector<AMDSmiProcessor*> cpu_processors_;
|
||||
std::vector<AMDSmiProcessor*> cpu_core_processors_;
|
||||
};
|
||||
|
||||
} // namespace amd::smi
|
||||
|
||||
#endif // AMD_SMI_INCLUDE_AMD_SMI_SOCKET_H_
|
||||
@@ -0,0 +1,84 @@
|
||||
/*
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef AMD_SMI_INCLUDE_AMD_SMI_SYSTEM_H_
|
||||
#define AMD_SMI_INCLUDE_AMD_SMI_SYSTEM_H_
|
||||
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include "amd_smi/amdsmi.h"
|
||||
#include "amd_smi/impl/amd_smi_socket.h"
|
||||
#include "amd_smi/impl/amd_smi_processor.h"
|
||||
#include "amd_smi/impl/amd_smi_drm.h"
|
||||
|
||||
namespace amd::smi {
|
||||
|
||||
// Singleton: Only one system in an application
|
||||
class AMDSmiSystem {
|
||||
public:
|
||||
static AMDSmiSystem& getInstance() {
|
||||
static AMDSmiSystem instance;
|
||||
return instance;
|
||||
}
|
||||
amdsmi_status_t init(uint64_t flags);
|
||||
amdsmi_status_t cleanup();
|
||||
|
||||
std::vector<AMDSmiSocket*>& get_sockets() {return sockets_;}
|
||||
|
||||
amdsmi_status_t handle_to_socket(amdsmi_socket_handle socket_handle,
|
||||
AMDSmiSocket** socket);
|
||||
|
||||
amdsmi_status_t handle_to_processor(amdsmi_processor_handle processor_handle,
|
||||
AMDSmiProcessor** device);
|
||||
|
||||
amdsmi_status_t gpu_index_to_handle(uint32_t gpu_index,
|
||||
amdsmi_processor_handle* processor_handle);
|
||||
|
||||
amdsmi_status_t get_cpu_family(uint32_t *cpu_family);
|
||||
|
||||
amdsmi_status_t get_cpu_model(uint32_t *cpu_model);
|
||||
|
||||
amdsmi_status_t get_cpu_model_name(uint32_t socket_id, std::string *model_name);
|
||||
|
||||
amdsmi_status_t get_sys_cpu_cores_per_socket(uint32_t *core_num) ;
|
||||
|
||||
amdsmi_status_t get_sys_num_of_cpu_sockets(uint32_t *sock_num);
|
||||
|
||||
std::vector<uint32_t> get_cpu_sockets_from_numa_node(int32_t numa_node);
|
||||
private:
|
||||
AMDSmiSystem() : init_flag_(AMDSMI_INIT_AMD_GPUS) {}
|
||||
|
||||
/* The GPU socket id is used to identify the socket, so that the XCDs
|
||||
on the same physical device will be collected under the same socket.
|
||||
The BD part of the BDF is used as GPU socket to represent a phyiscal device.
|
||||
*/
|
||||
amdsmi_status_t get_gpu_socket_id(uint32_t index, std::string& socketid);
|
||||
amdsmi_status_t populate_amd_gpu_devices();
|
||||
amdsmi_status_t populate_amd_cpus();
|
||||
uint64_t init_flag_;
|
||||
AMDSmiDrm drm_;
|
||||
std::vector<AMDSmiSocket*> sockets_;
|
||||
std::set<AMDSmiProcessor*> processors_; // Track valid processors
|
||||
};
|
||||
} // namespace amd::smi
|
||||
|
||||
#endif // AMD_SMI_INCLUDE_AMD_SMI_SYSTEM_H_
|
||||
@@ -0,0 +1,201 @@
|
||||
/*
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef AMD_SMI_INCLUDE_AMD_SMI_UTILS_H_
|
||||
#define AMD_SMI_INCLUDE_AMD_SMI_UTILS_H_
|
||||
|
||||
#include <dirent.h>
|
||||
#include <limits>
|
||||
#include <type_traits>
|
||||
#include <string>
|
||||
|
||||
#include "amd_smi/amdsmi.h"
|
||||
#include "amd_smi/impl/amd_smi_gpu_device.h"
|
||||
|
||||
|
||||
#define SMIGPUDEVICE_MUTEX(MUTEX) \
|
||||
amd::smi::pthread_wrap _pw(*(MUTEX)); \
|
||||
amd::smi::ScopedPthread _lock(_pw, true); \
|
||||
if (_lock.mutex_not_acquired()) { \
|
||||
return AMDSMI_STATUS_BUSY; \
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
void amdsmi_free_name_value_pairs(void *p);
|
||||
}
|
||||
|
||||
amdsmi_status_t smi_amdgpu_find_hwmon_dir(amd::smi::AMDSmiGPUDevice* device, std::string* full_path);
|
||||
amdsmi_status_t smi_amdgpu_get_board_info(amd::smi::AMDSmiGPUDevice* device, amdsmi_board_info_t *info);
|
||||
amdsmi_status_t smi_amdgpu_get_power_cap(amd::smi::AMDSmiGPUDevice* device, uint32_t sensor_ind, int *cap);
|
||||
amdsmi_status_t smi_amdgpu_get_ranges(amd::smi::AMDSmiGPUDevice* device, amdsmi_clk_type_t domain, int *max_freq, int *min_freq, int *num_dpm, int *sleep_state_freq);
|
||||
amdsmi_status_t smi_amdgpu_get_enabled_blocks(amd::smi::AMDSmiGPUDevice* device, uint64_t *enabled_blocks);
|
||||
amdsmi_status_t smi_amdgpu_get_bad_page_info(amd::smi::AMDSmiGPUDevice* device, uint32_t *num_pages, amdsmi_retired_page_record_t *info);
|
||||
amdsmi_status_t smi_amdgpu_get_bad_page_threshold(amd::smi::AMDSmiGPUDevice* device, uint32_t *threshold);
|
||||
amdsmi_status_t smi_amdgpu_validate_ras_eeprom(amd::smi::AMDSmiGPUDevice* device);
|
||||
amdsmi_status_t smi_amdgpu_get_ecc_error_count(amd::smi::AMDSmiGPUDevice* device, amdsmi_error_count_t *err_cnt);
|
||||
amdsmi_status_t smi_amdgpu_get_driver_version(amd::smi::AMDSmiGPUDevice* device, int *length, char *version);
|
||||
amdsmi_status_t smi_amdgpu_get_pcie_speed_from_pcie_type(uint16_t pcie_type, uint32_t *pcie_speed);
|
||||
amdsmi_status_t smi_amdgpu_get_market_name_from_dev_id(amd::smi::AMDSmiGPUDevice* device, char *market_name);
|
||||
amdsmi_status_t smi_amdgpu_is_gpu_power_management_enabled(amd::smi::AMDSmiGPUDevice* device, bool *enabled);
|
||||
std::string smi_split_string(std::string str, char delim);
|
||||
std::string smi_amdgpu_get_status_string(amdsmi_status_t ret, bool fullStatus);
|
||||
amdsmi_status_t smi_clear_char_and_reinitialize(char buffer[], uint32_t len,
|
||||
std::string newString);
|
||||
|
||||
/**
|
||||
* @brief Get the device index given the processor handle.
|
||||
*
|
||||
* @details Given a processor handle @p processor_handle
|
||||
* and a pointer to a uint32_t @p device_index will be returned.
|
||||
*
|
||||
* @param[in] processor_handle Device which to query
|
||||
*
|
||||
* @param[inout] device_index a pointer to uint32_t to which the matching device
|
||||
* index will be stored
|
||||
*
|
||||
* @retval ::AMDSMI_STATUS_SUCCESS is returned upon successful call.
|
||||
* ::AMDSMI_STATUS_INVAL is returned if user provides a null pointer
|
||||
* for device_index.
|
||||
* ::AMDSMI_STATUS_API_FAILED is returned if the corresponding device
|
||||
* index for the processor handle cannot be found.
|
||||
*/
|
||||
amdsmi_status_t smi_amdgpu_get_device_index(amdsmi_processor_handle processor_handle,
|
||||
uint32_t* device_index);
|
||||
|
||||
/**
|
||||
* @brief Get total number of devices
|
||||
*
|
||||
* @details Given a pointer to a uint32_t @p total_num_devices will be returned
|
||||
*
|
||||
* @param[inout] total_num_devices a pointer to uint32_t to which the total number
|
||||
* of devices will be stored
|
||||
*
|
||||
* @retval ::AMDSMI_STATUS_SUCCESS is returned upon successful call.
|
||||
* ::AMDSMI_STATUS_INVAL is returned if user provides a null pointer
|
||||
* for total_num_devices.
|
||||
*/
|
||||
amdsmi_status_t smi_amdgpu_get_device_count(uint32_t *total_num_devices);
|
||||
|
||||
/**
|
||||
* @brief Get the processor handle given the device index.
|
||||
*
|
||||
* @details Given a uint32_t @p device_index and a pointer to
|
||||
* a processor handle @p processor_handle, the device index will be used to
|
||||
* find the processor handle of the device and store it in the provided pointer
|
||||
*
|
||||
* @param[in] device_index a uint32_t to value to help find the corresponding
|
||||
* processor handle
|
||||
*
|
||||
* @param[inout] processor_handle a pointer to amdsmi_processor_handle
|
||||
* which the corresponding processor_handle will be stored
|
||||
*
|
||||
* @retval ::AMDSMI_STATUS_SUCCESS is returned upon successful call.
|
||||
* ::AMDSMI_STATUS_INVAL is returned if user provides a null pointer
|
||||
* for processor_handle.
|
||||
* ::AMDSMI_STATUS_API_FAILED is returned if the device_index is cannot
|
||||
* be found.
|
||||
*/
|
||||
amdsmi_status_t smi_amdgpu_get_processor_handle_by_index(
|
||||
uint32_t device_index,
|
||||
amdsmi_processor_handle *processor_handle);
|
||||
|
||||
/**
|
||||
* @brief Get an int environment var or return default if does not exist
|
||||
*
|
||||
* @details Given a const char* @p name and a default int @p def
|
||||
* and call getenv with name. On any error, return default int
|
||||
*
|
||||
* @param[in] name a const char* containing ENV var name
|
||||
*
|
||||
* @param[in] def default int in case of error
|
||||
*
|
||||
* @retval int of environment variable
|
||||
*/
|
||||
int read_env_ms(const char* name, int def);
|
||||
|
||||
template<typename>
|
||||
constexpr bool is_dependent_false_v = false;
|
||||
|
||||
template<typename T>
|
||||
inline constexpr bool is_supported_type_v = (
|
||||
std::is_same_v<std::remove_cv_t<std::remove_reference_t<T>>, std::uint8_t> ||
|
||||
std::is_same_v<std::remove_cv_t<std::remove_reference_t<T>>, std::uint16_t> ||
|
||||
std::is_same_v<std::remove_cv_t<std::remove_reference_t<T>>, std::uint32_t> ||
|
||||
std::is_same_v<std::remove_cv_t<std::remove_reference_t<T>>, std::uint64_t>
|
||||
);
|
||||
|
||||
template<typename T>
|
||||
constexpr T get_std_num_limit()
|
||||
{
|
||||
if constexpr (is_supported_type_v<T>) {
|
||||
return std::numeric_limits<T>::max();
|
||||
} else {
|
||||
return std::numeric_limits<T>::min();
|
||||
static_assert(is_dependent_false_v<T>, "Error: Type not supported...");
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
constexpr bool is_std_num_limit(T value)
|
||||
{
|
||||
return (value == get_std_num_limit<T>());
|
||||
}
|
||||
|
||||
template<typename T, typename U, typename V = T>
|
||||
constexpr T translate_umax_or_assign_value(U source_value, V target_value)
|
||||
{
|
||||
T result{};
|
||||
if constexpr (is_supported_type_v<T> && is_supported_type_v<U>) {
|
||||
// If the source value is uint<U>::max(), then return is uint<T>::max()
|
||||
if (is_std_num_limit(source_value)) {
|
||||
result = get_std_num_limit<T>();
|
||||
} else {
|
||||
result = static_cast<T>(target_value);
|
||||
}
|
||||
|
||||
return result;
|
||||
} else {
|
||||
static_assert(is_dependent_false_v<T>, "Error: Type not supported...");
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename A, typename T>
|
||||
void fill_2d_array(A& arr, T value) {
|
||||
for (auto& row : arr) {
|
||||
std::fill(std::begin(row), std::end(row), value);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Get the product serial number given the processor handle.
|
||||
*
|
||||
* @param[in] processor_handle a pointer to amdsmi_processor_handle
|
||||
* which the corresponding processor_handle will be stored
|
||||
*
|
||||
* @retval ::The serial number
|
||||
* ::0 if it cannot be determined
|
||||
*/
|
||||
uint64_t get_product_serial_number(amdsmi_processor_handle processor_handle);
|
||||
|
||||
#endif // AMD_SMI_INCLUDE_AMD_SMI_UTILS_H_
|
||||
@@ -0,0 +1,41 @@
|
||||
/*
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef GPUVSMI_UUID_H_
|
||||
#define GPUVSMI_UUID_H_
|
||||
|
||||
/**
|
||||
* \brief Generates uuid for device with specified parameters
|
||||
*
|
||||
* \param [out] str String buffer where to output generated uuid
|
||||
*
|
||||
* \param [in] serial Asic serial
|
||||
*
|
||||
* \param [in] did Device ID
|
||||
*
|
||||
* \param [in] idx PF/VF index
|
||||
*
|
||||
* \return SMI_RET_CODE indicating result.
|
||||
*/
|
||||
amdsmi_status_t amdsmi_uuid_gen(char *str, uint64_t serial, uint16_t did, uint8_t idx);
|
||||
|
||||
#endif
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user